shithub: openh264

Download patch

ref: 0d71326adbfc38610a50f2e20b6863b8bf36d2c9
parent: e3e83832299bd2c2e3e2582fbd88c0b985afeaf7
parent: 29021a871d0fd74965a1b64102fc14dd3c4c3608
author: sijchen <sijchen@cisco.com>
date: Thu Jul 26 07:20:37 EDT 2018

Merge pull request #2991 from xiaotianshi2/master

B-frame decoding support for Main and High Profile with two test cases.

diff: cannot open b/codec/vsproject//null: file does not exist: 'b/codec/vsproject//null'
--- a/codec/api/svc/codec_api.h
+++ b/codec/api/svc/codec_api.h
@@ -1,576 +1,591 @@
-/*!
- *@page License
- *
- * \copy
- *     Copyright (c)  2013, Cisco Systems
- *     All rights reserved.
- *
- *     Redistribution and use in source and binary forms, with or without
- *     modification, are permitted provided that the following conditions
- *     are met:
- *
- *        * Redistributions of source code must retain the above copyright
- *          notice, this list of conditions and the following disclaimer.
- *
- *        * Redistributions in binary form must reproduce the above copyright
- *          notice, this list of conditions and the following disclaimer in
- *          the documentation and/or other materials provided with the
- *          distribution.
- *
- *     THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- *     "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- *     LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
- *     FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
- *     COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
- *     INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
- *     BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
- *     LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
- *     CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- *     LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
- *     ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- *     POSSIBILITY OF SUCH DAMAGE.
- *
- */
-
-#ifndef WELS_VIDEO_CODEC_SVC_API_H__
-#define WELS_VIDEO_CODEC_SVC_API_H__
-
-#ifndef __cplusplus
-#if defined(_MSC_VER) && (_MSC_VER < 1800)
-typedef unsigned char bool;
-#else
-#include <stdbool.h>
-#endif
-#endif
-
-#include "codec_app_def.h"
-#include "codec_def.h"
-
-#if defined(_WIN32) || defined(__cdecl)
-#define EXTAPI __cdecl
-#else
-#define EXTAPI
-#endif
-
-/**
-  * @file codec_api.h
-*/
-
-/**
-  * @page Overview
-  *   * This page is for openh264 codec API usage.
-  *   * For how to use the encoder,please refer to page UsageExampleForEncoder
-  *   * For how to use the decoder,please refer to page UsageExampleForDecoder
-  *   * For more detail about ISVEncoder,please refer to page ISVCEnoder
-  *   * For more detail about ISVDecoder,please refer to page ISVCDecoder
-*/
-
-/**
-  * @page DecoderUsageExample
-  *
-  * @brief
-  *   * An example for using the decoder for Decoding only or Parsing only
-  *
-  * Step 1:decoder declaration
-  * @code
-  *
-  *  //decoder declaration
-  *  ISVCDecoder *pSvcDecoder;
-  *  //input: encoded bitstream start position; should include start code prefix
-  *  unsigned char *pBuf =...;
-  *  //input: encoded bit stream length; should include the size of start code prefix
-  *  int iSize =...;
-  *  //output: [0~2] for Y,U,V buffer for Decoding only
-  *  unsigned char *pData[3] =...;
-  *  //in-out: for Decoding only: declare and initialize the output buffer info, this should never co-exist with Parsing only
-  *  SBufferInfo sDstBufInfo;
-  *  memset(&sDstBufInfo, 0, sizeof(SBufferInfo));
-  *  //in-out: for Parsing only: declare and initialize the output bitstream buffer info for parse only, this should never co-exist with Decoding only
-  *  SParserBsInfo sDstParseInfo;
-  *  memset(&sDstParseInfo, 0, sizeof(SParserBsInfo));
-  *  sDstParseInfo.pDstBuff = new unsigned char[PARSE_SIZE]; //In Parsing only, allocate enough buffer to save transcoded bitstream for a frame
-  *
-  * @endcode
-  *
-  * Step 2:decoder creation
-  * @code
-  *  CreateDecoder(pSvcDecoder);
-  * @endcode
-  *
-  * Step 3:declare required parameter, used to differentiate Decoding only and Parsing only
-  * @code
-  *  SDecodingParam sDecParam = {0};
-  *  sDecParam.sVideoProperty.eVideoBsType = VIDEO_BITSTREAM_AVC;
-  *  //for Parsing only, the assignment is mandatory
-  *  sDecParam.bParseOnly = true;
-  * @endcode
-  *
-  * Step 4:initialize the parameter and decoder context, allocate memory
-  * @code
-  *  Initialize(&sDecParam);
-  * @endcode
-  *
-  * Step 5:do actual decoding process in slice level;
-  *        this can be done in a loop until data ends
-  * @code
-  *  //for Decoding only
-  *  iRet = DecodeFrameNoDelay(pBuf, iSize, pData, &sDstBufInfo);
-  *  //or
-  *  iRet = DecodeFrame2(pBuf, iSize, pData, &sDstBufInfo);
-  *  //for Parsing only
-  *  iRet = DecodeParser(pBuf, iSize, &sDstParseInfo);
-  *  //decode failed
-  *  If (iRet != 0){
-  *      RequestIDR or something like that.
-  *  }
-  *  //for Decoding only, pData can be used for render.
-  *  if (sDstBufInfo.iBufferStatus==1){
-  *      output pData[0], pData[1], pData[2];
-  *  }
-  * //for Parsing only, sDstParseInfo can be used for, e.g., HW decoding
-  *  if (sDstBufInfo.iNalNum > 0){
-  *      Hardware decoding sDstParseInfo;
-  *  }
-  *  //no-delay decoding can be realized by directly calling DecodeFrameNoDelay(), which is the recommended usage.
-  *  //no-delay decoding can also be realized by directly calling DecodeFrame2() again with NULL input, as in the following. In this case, decoder would immediately reconstruct the input data. This can also be used similarly for Parsing only. Consequent decoding error and output indication should also be considered as above.
-  *  iRet = DecodeFrame2(NULL, 0, pData, &sDstBufInfo);
-  *  judge iRet, sDstBufInfo.iBufferStatus ...
-  * @endcode
-  *
-  * Step 6:uninitialize the decoder and memory free
-  * @code
-  *  Uninitialize();
-  * @endcode
-  *
-  * Step 7:destroy the decoder
-  * @code
-  *  DestroyDecoder();
-  * @endcode
-  *
-*/
-
-/**
-  * @page EncoderUsageExample1
-  *
-  * @brief
-  *  * An example for using encoder with basic parameter
-  *
-  * Step1:setup encoder
-  * @code
-  *  int rv = WelsCreateSVCEncoder (&encoder_);
-  *  ASSERT_EQ (0, rv);
-  *  ASSERT_TRUE (encoder_ != NULL);
-  * @endcode
-  *
-  * Step2:initilize with basic parameter
-  * @code
-  *  SEncParamBase param;
-  *  memset (&param, 0, sizeof (SEncParamBase));
-  *  param.iUsageType = usageType;
-  *  param.fMaxFrameRate = frameRate;
-  *  param.iPicWidth = width;
-  *  param.iPicHeight = height;
-  *  param.iTargetBitrate = 5000000;
-  *  encoder_->Initialize (&param);
-  * @endcode
-  *
-  * Step3:set option, set option during encoding process
-  * @code
-  *  encoder_->SetOption (ENCODER_OPTION_TRACE_LEVEL, &g_LevelSetting);
-  *  int videoFormat = videoFormatI420;
-  *  encoder_->SetOption (ENCODER_OPTION_DATAFORMAT, &videoFormat);
-  * @endcode
-  *
-  * Step4: encode and  store ouput bistream
-  * @code
-  *  int frameSize = width * height * 3 / 2;
-  *  BufferedData buf;
-  *  buf.SetLength (frameSize);
-  *  ASSERT_TRUE (buf.Length() == (size_t)frameSize);
-  *  SFrameBSInfo info;
-  *  memset (&info, 0, sizeof (SFrameBSInfo));
-  *  SSourcePicture pic;
-  *  memset (&pic, 0, sizeof (SsourcePicture));
-  *  pic.iPicWidth = width;
-  *  pic.iPicHeight = height;
-  *  pic.iColorFormat = videoFormatI420;
-  *  pic.iStride[0] = pic.iPicWidth;
-  *  pic.iStride[1] = pic.iStride[2] = pic.iPicWidth >> 1;
-  *  pic.pData[0] = buf.data();
-  *  pic.pData[1] = pic.pData[0] + width * height;
-  *  pic.pData[2] = pic.pData[1] + (width * height >> 2);
-  *  for(int num = 0;num<total_num;num++) {
-  *     //prepare input data
-  *     rv = encoder_->EncodeFrame (&pic, &info);
-  *     ASSERT_TRUE (rv == cmResultSuccess);
-  *     if (info.eFrameType != videoFrameTypeSkip && cbk != NULL) {
-  *      //output bitstream
-  *     }
-  *  }
-  * @endcode
-  *
-  * Step5:teardown encoder
-  * @code
-  *  if (encoder_) {
-  *      encoder_->Uninitialize();
-  *      WelsDestroySVCEncoder (encoder_);
-  *  }
-  * @endcode
-  *
-  */
-
-/**
-  * @page EncoderUsageExample2
-  *
-  * @brief
-  *     * An example for using the encoder with extension parameter.
-  *     * The same operation on Step 1,3,4,5 with Example-1
-  *
-  * Step 2:initialize with extension parameter
-  * @code
-  *  SEncParamExt param;
-  *  encoder->GetDefaultParams (&param);
-  *  param.iUsageType = usageType;
-  *  param.fMaxFrameRate = frameRate;
-  *  param.iPicWidth = width;
-  *  param.iPicHeight = height;
-  *  param.iTargetBitrate = 5000000;
-  *  param.bEnableDenoise = denoise;
-  *  param.iSpatialLayerNum = layers;
-  *  //SM_DYN_SLICE don't support multi-thread now
-  *  if (sliceMode != SM_SINGLE_SLICE && sliceMode != SM_DYN_SLICE)
-  *      param.iMultipleThreadIdc = 2;
-  *
-  *  for (int i = 0; i < param.iSpatialLayerNum; i++) {
-  *      param.sSpatialLayers[i].iVideoWidth = width >> (param.iSpatialLayerNum - 1 - i);
-  *      param.sSpatialLayers[i].iVideoHeight = height >> (param.iSpatialLayerNum - 1 - i);
-  *      param.sSpatialLayers[i].fFrameRate = frameRate;
-  *      param.sSpatialLayers[i].iSpatialBitrate = param.iTargetBitrate;
-  *
-  *      param.sSpatialLayers[i].sSliceCfg.uiSliceMode = sliceMode;
-  *      if (sliceMode == SM_DYN_SLICE) {
-  *          param.sSpatialLayers[i].sSliceCfg.sSliceArgument.uiSliceSizeConstraint = 600;
-  *          param.uiMaxNalSize = 1500;
-  *      }
-  *  }
-  *  param.iTargetBitrate *= param.iSpatialLayerNum;
-  *  encoder_->InitializeExt (&param);
-  *  int videoFormat = videoFormatI420;
-  *  encoder_->SetOption (ENCODER_OPTION_DATAFORMAT, &videoFormat);
-  *
-  * @endcode
-  */
-
-
-
-
-#ifdef __cplusplus
-/**
-* @brief Endocder definition
-*/
-class ISVCEncoder {
- public:
-  /**
-  * @brief  Initialize the encoder
-  * @param  pParam  basic encoder parameter
-  * @return CM_RETURN: 0 - success; otherwise - failed;
-  */
-  virtual int EXTAPI Initialize (const SEncParamBase* pParam) = 0;
-
-  /**
-  * @brief  Initilaize encoder by using extension parameters.
-  * @param  pParam  extension parameter for encoder
-  * @return CM_RETURN: 0 - success; otherwise - failed;
-  */
-  virtual int EXTAPI InitializeExt (const SEncParamExt* pParam) = 0;
-
-  /**
-  * @brief   Get the default extension parameters.
-  *          If you want to change some parameters of encoder, firstly you need to get the default encoding parameters,
-  *          after that you can change part of parameters you want to.
-  * @param   pParam  extension parameter for encoder
-  * @return  CM_RETURN: 0 - success; otherwise - failed;
-  * */
-  virtual int EXTAPI GetDefaultParams (SEncParamExt* pParam) = 0;
-  /// uninitialize the encoder
-  virtual int EXTAPI Uninitialize() = 0;
-
-  /**
-  * @brief Encode one frame
-  * @param kpSrcPic the pointer to the source luminance plane
-  *        chrominance data:
-  *        CbData = kpSrc  +  m_iMaxPicWidth * m_iMaxPicHeight;
-  *        CrData = CbData + (m_iMaxPicWidth * m_iMaxPicHeight)/4;
-  *        the application calling this interface needs to ensure the data validation between the location
-  * @param pBsInfo output bit stream
-  * @return  0 - success; otherwise -failed;
-  */
-  virtual int EXTAPI EncodeFrame (const SSourcePicture* kpSrcPic, SFrameBSInfo* pBsInfo) = 0;
-
-  /**
-  * @brief  Encode the parameters from output bit stream
-  * @param  pBsInfo output bit stream
-  * @return 0 - success; otherwise - failed;
-  */
-  virtual int EXTAPI EncodeParameterSets (SFrameBSInfo* pBsInfo) = 0;
-
-  /**
-  * @brief  Force encoder to encoder frame as IDR if bIDR set as true
-  * @param  bIDR true: force encoder to encode frame as IDR frame;false, return 1 and nothing to do
-  * @return 0 - success; otherwise - failed;
-  */
-  virtual int EXTAPI ForceIntraFrame (bool bIDR,int iLayerId = -1) = 0;
-
-  /**
-  * @brief   Set option for encoder, detail option type, please refer to enumurate ENCODER_OPTION.
-  * @param   pOption option for encoder such as InDataFormat, IDRInterval, SVC Encode Param, Frame Rate, Bitrate,...
-  * @return  CM_RETURN: 0 - success; otherwise - failed;
-  */
-  virtual int EXTAPI SetOption (ENCODER_OPTION eOptionId, void* pOption) = 0;
-
-  /**
-  * @brief   Set option for encoder, detail option type, please refer to enumurate ENCODER_OPTION.
-  * @param   pOption option for encoder such as InDataFormat, IDRInterval, SVC Encode Param, Frame Rate, Bitrate,...
-  * @return  CM_RETURN: 0 - success; otherwise - failed;
-  */
-  virtual int EXTAPI GetOption (ENCODER_OPTION eOptionId, void* pOption) = 0;
-  virtual ~ISVCEncoder() {}
-};
-
-
-
-/**
-* @brief Decoder definition
-*/
-class ISVCDecoder {
- public:
-
-  /**
-  * @brief  Initilaize decoder
-  * @param  pParam  parameter for decoder
-  * @return 0 - success; otherwise - failed;
-  */
-  virtual long EXTAPI Initialize (const SDecodingParam* pParam) = 0;
-
-  /// Uninitialize the decoder
-  virtual long EXTAPI Uninitialize() = 0;
-
-  /**
-  * @brief   Decode one frame
-  * @param   pSrc the h264 stream to be decoded
-  * @param   iSrcLen the length of h264 stream
-  * @param   ppDst buffer pointer of decoded data (YUV)
-  * @param   pStride output stride
-  * @param   iWidth output width
-  * @param   iHeight output height
-  * @return  0 - success; otherwise -failed;
-  */
-  virtual DECODING_STATE EXTAPI DecodeFrame (const unsigned char* pSrc,
-      const int iSrcLen,
-      unsigned char** ppDst,
-      int* pStride,
-      int& iWidth,
-      int& iHeight) = 0;
-
-/**
-  * @brief    For slice level DecodeFrameNoDelay() (4 parameters input),
-  *           whatever the function return value is, the output data
-  *           of I420 format will only be available when pDstInfo->iBufferStatus == 1,.
-  *           This function will parse and reconstruct the input frame immediately if it is complete
-  *           It is recommended as the main decoding function for H.264/AVC format input
-  * @param   pSrc the h264 stream to be decoded
-  * @param   iSrcLen the length of h264 stream
-  * @param   ppDst buffer pointer of decoded data (YUV)
-  * @param   pDstInfo information provided to API(width, height, etc.)
-  * @return  0 - success; otherwise -failed;
-  */
-  virtual DECODING_STATE EXTAPI DecodeFrameNoDelay (const unsigned char* pSrc,
-      const int iSrcLen,
-      unsigned char** ppDst,
-      SBufferInfo* pDstInfo) = 0;
-
-  /**
-  * @brief    For slice level DecodeFrame2() (4 parameters input),
-  *           whatever the function return value is, the output data
-  *           of I420 format will only be available when pDstInfo->iBufferStatus == 1,.
-  *           (e.g., in multi-slice cases, only when the whole picture
-  *           is completely reconstructed, this variable would be set equal to 1.)
-  * @param   pSrc the h264 stream to be decoded
-  * @param   iSrcLen the length of h264 stream
-  * @param   ppDst buffer pointer of decoded data (YUV)
-  * @param   pDstInfo information provided to API(width, height, etc.)
-  * @return  0 - success; otherwise -failed;
-  */
-  virtual DECODING_STATE EXTAPI DecodeFrame2 (const unsigned char* pSrc,
-      const int iSrcLen,
-      unsigned char** ppDst,
-      SBufferInfo* pDstInfo) = 0;
-
-  /**
-  * @brief   This function parse input bitstream only, and rewrite possible SVC syntax to AVC syntax
-  * @param   pSrc the h264 stream to be decoded
-  * @param   iSrcLen the length of h264 stream
-  * @param   pDstInfo bit stream info
-  * @return  0 - success; otherwise -failed;
-  */
-  virtual DECODING_STATE EXTAPI DecodeParser (const unsigned char* pSrc,
-      const int iSrcLen,
-      SParserBsInfo* pDstInfo) = 0;
-
-  /**
-  * @brief   This API does not work for now!! This is for future use to support non-I420 color format output.
-  * @param   pSrc the h264 stream to be decoded
-  * @param   iSrcLen the length of h264 stream
-  * @param   pDst buffer pointer of decoded data (YUV)
-  * @param   iDstStride output stride
-  * @param   iDstLen bit stream info
-  * @param   iWidth output width
-  * @param   iHeight output height
-  * @param   iColorFormat output color format
-  * @return  to do ...
-  */
-  virtual DECODING_STATE EXTAPI DecodeFrameEx (const unsigned char* pSrc,
-      const int iSrcLen,
-      unsigned char* pDst,
-      int iDstStride,
-      int& iDstLen,
-      int& iWidth,
-      int& iHeight,
-      int& iColorFormat) = 0;
-
-  /**
-  * @brief   Set option for decoder, detail option type, please refer to enumurate DECODER_OPTION.
-  * @param   pOption  option for decoder such as OutDataFormat, Eos Flag, EC method, ...
-  * @return  CM_RETURN: 0 - success; otherwise - failed;
-  */
-  virtual long EXTAPI SetOption (DECODER_OPTION eOptionId, void* pOption) = 0;
-
-  /**
-  * @brief   Get option for decoder, detail option type, please refer to enumurate DECODER_OPTION.
-  * @param   pOption  option for decoder such as OutDataFormat, Eos Flag, EC method, ...
-  * @return  CM_RETURN: 0 - success; otherwise - failed;
-  */
-  virtual long EXTAPI GetOption (DECODER_OPTION eOptionId, void* pOption) = 0;
-  virtual ~ISVCDecoder() {}
-};
-
-
-extern "C"
-{
-#else
-
-typedef struct ISVCEncoderVtbl ISVCEncoderVtbl;
-typedef const ISVCEncoderVtbl* ISVCEncoder;
-struct ISVCEncoderVtbl {
-
-int (*Initialize) (ISVCEncoder*, const SEncParamBase* pParam);
-int (*InitializeExt) (ISVCEncoder*, const SEncParamExt* pParam);
-
-int (*GetDefaultParams) (ISVCEncoder*, SEncParamExt* pParam);
-
-int (*Uninitialize) (ISVCEncoder*);
-
-int (*EncodeFrame) (ISVCEncoder*, const SSourcePicture* kpSrcPic, SFrameBSInfo* pBsInfo);
-int (*EncodeParameterSets) (ISVCEncoder*, SFrameBSInfo* pBsInfo);
-
-int (*ForceIntraFrame) (ISVCEncoder*, bool bIDR);
-
-int (*SetOption) (ISVCEncoder*, ENCODER_OPTION eOptionId, void* pOption);
-int (*GetOption) (ISVCEncoder*, ENCODER_OPTION eOptionId, void* pOption);
-};
-
-typedef struct ISVCDecoderVtbl ISVCDecoderVtbl;
-typedef const ISVCDecoderVtbl* ISVCDecoder;
-struct ISVCDecoderVtbl {
-long (*Initialize) (ISVCDecoder*, const SDecodingParam* pParam);
-long (*Uninitialize) (ISVCDecoder*);
-
-DECODING_STATE (*DecodeFrame) (ISVCDecoder*, const unsigned char* pSrc,
-                               const int iSrcLen,
-                               unsigned char** ppDst,
-                               int* pStride,
-                               int* iWidth,
-                               int* iHeight);
-
-DECODING_STATE (*DecodeFrameNoDelay) (ISVCDecoder*, const unsigned char* pSrc,
-                                const int iSrcLen,
-                                unsigned char** ppDst,
-                                SBufferInfo* pDstInfo);
-
-DECODING_STATE (*DecodeFrame2) (ISVCDecoder*, const unsigned char* pSrc,
-                                const int iSrcLen,
-                                unsigned char** ppDst,
-                                SBufferInfo* pDstInfo);
-
-DECODING_STATE (*DecodeParser) (ISVCDecoder*, const unsigned char* pSrc,
-                                const int iSrcLen,
-                                SParserBsInfo* pDstInfo);
-
-DECODING_STATE (*DecodeFrameEx) (ISVCDecoder*, const unsigned char* pSrc,
-                                 const int iSrcLen,
-                                 unsigned char* pDst,
-                                 int iDstStride,
-                                 int* iDstLen,
-                                 int* iWidth,
-                                 int* iHeight,
-                                 int* iColorFormat);
-
-long (*SetOption) (ISVCDecoder*, DECODER_OPTION eOptionId, void* pOption);
-long (*GetOption) (ISVCDecoder*, DECODER_OPTION eOptionId, void* pOption);
-};
-#endif
-
-typedef void (*WelsTraceCallback) (void* ctx, int level, const char* string);
-
-/** @brief   Create encoder
- *  @param   ppEncoder encoder
- *  @return  0 - success; otherwise - failed;
-*/
-int  WelsCreateSVCEncoder (ISVCEncoder** ppEncoder);
-
-
-/** @brief   Destroy encoder
-*   @param   pEncoder encoder
- *  @return  void
-*/
-void WelsDestroySVCEncoder (ISVCEncoder* pEncoder);
-
-
-/** @brief   Get the capability of decoder
- *  @param   pDecCapability  decoder capability
- *  @return  0 - success; otherwise - failed;
-*/
-int WelsGetDecoderCapability (SDecoderCapability* pDecCapability);
-
-
-/** @brief   Create decoder
- *  @param   ppDecoder decoder
- *  @return  0 - success; otherwise - failed;
-*/
-long WelsCreateDecoder (ISVCDecoder** ppDecoder);
-
-
-/** @brief   Destroy decoder
- *  @param   pDecoder  decoder
- *  @return  void
-*/
-void WelsDestroyDecoder (ISVCDecoder* pDecoder);
-
-/** @brief   Get codec version
- *           Note, old versions of Mingw (GCC < 4.7) are buggy and use an
- *           incorrect/different ABI for calling this function, making it
- *           incompatible with MSVC builds.
- *  @return  The linked codec version
-*/
-OpenH264Version WelsGetCodecVersion (void);
-
-/** @brief   Get codec version
- *  @param   pVersion  struct to fill in with the version
-*/
-void WelsGetCodecVersionEx (OpenH264Version *pVersion);
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif//WELS_VIDEO_CODEC_SVC_API_H__
+/*!
+ *@page License
+ *
+ * \copy
+ *     Copyright (c)  2013, Cisco Systems
+ *     All rights reserved.
+ *
+ *     Redistribution and use in source and binary forms, with or without
+ *     modification, are permitted provided that the following conditions
+ *     are met:
+ *
+ *        * Redistributions of source code must retain the above copyright
+ *          notice, this list of conditions and the following disclaimer.
+ *
+ *        * Redistributions in binary form must reproduce the above copyright
+ *          notice, this list of conditions and the following disclaimer in
+ *          the documentation and/or other materials provided with the
+ *          distribution.
+ *
+ *     THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *     "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *     LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+ *     FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+ *     COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ *     INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ *     BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ *     LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+ *     CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ *     LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
+ *     ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ *     POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+
+#ifndef WELS_VIDEO_CODEC_SVC_API_H__
+#define WELS_VIDEO_CODEC_SVC_API_H__
+
+#ifndef __cplusplus
+#if defined(_MSC_VER) && (_MSC_VER < 1800)
+typedef unsigned char bool;
+#else
+#include <stdbool.h>
+#endif
+#endif
+
+#include "codec_app_def.h"
+#include "codec_def.h"
+
+#if defined(_WIN32) || defined(__cdecl)
+#define EXTAPI __cdecl
+#else
+#define EXTAPI
+#endif
+
+/**
+  * @file codec_api.h
+*/
+
+/**
+  * @page Overview
+  *   * This page is for openh264 codec API usage.
+  *   * For how to use the encoder,please refer to page UsageExampleForEncoder
+  *   * For how to use the decoder,please refer to page UsageExampleForDecoder
+  *   * For more detail about ISVEncoder,please refer to page ISVCEnoder
+  *   * For more detail about ISVDecoder,please refer to page ISVCDecoder
+*/
+
+/**
+  * @page DecoderUsageExample
+  *
+  * @brief
+  *   * An example for using the decoder for Decoding only or Parsing only
+  *
+  * Step 1:decoder declaration
+  * @code
+  *
+  *  //decoder declaration
+  *  ISVCDecoder *pSvcDecoder;
+  *  //input: encoded bitstream start position; should include start code prefix
+  *  unsigned char *pBuf =...;
+  *  //input: encoded bit stream length; should include the size of start code prefix
+  *  int iSize =...;
+  *  //output: [0~2] for Y,U,V buffer for Decoding only
+  *  unsigned char *pData[3] =...;
+  *  //in-out: for Decoding only: declare and initialize the output buffer info, this should never co-exist with Parsing only
+  *  SBufferInfo sDstBufInfo;
+  *  memset(&sDstBufInfo, 0, sizeof(SBufferInfo));
+  *  //in-out: for Parsing only: declare and initialize the output bitstream buffer info for parse only, this should never co-exist with Decoding only
+  *  SParserBsInfo sDstParseInfo;
+  *  memset(&sDstParseInfo, 0, sizeof(SParserBsInfo));
+  *  sDstParseInfo.pDstBuff = new unsigned char[PARSE_SIZE]; //In Parsing only, allocate enough buffer to save transcoded bitstream for a frame
+  *
+  * @endcode
+  *
+  * Step 2:decoder creation
+  * @code
+  *  CreateDecoder(pSvcDecoder);
+  * @endcode
+  *
+  * Step 3:declare required parameter, used to differentiate Decoding only and Parsing only
+  * @code
+  *  SDecodingParam sDecParam = {0};
+  *  sDecParam.sVideoProperty.eVideoBsType = VIDEO_BITSTREAM_AVC;
+  *  //for Parsing only, the assignment is mandatory
+  *  sDecParam.bParseOnly = true;
+  * @endcode
+  *
+  * Step 4:initialize the parameter and decoder context, allocate memory
+  * @code
+  *  Initialize(&sDecParam);
+  * @endcode
+  *
+  * Step 5:do actual decoding process in slice level;
+  *        this can be done in a loop until data ends
+  * @code
+  *  //for Decoding only
+  *  iRet = DecodeFrameNoDelay(pBuf, iSize, pData, &sDstBufInfo);
+  *  //or
+  *  iRet = DecodeFrame2(pBuf, iSize, pData, &sDstBufInfo);
+  *  //for Parsing only
+  *  iRet = DecodeParser(pBuf, iSize, &sDstParseInfo);
+  *  //decode failed
+  *  If (iRet != 0){
+  *      RequestIDR or something like that.
+  *  }
+  *  //for Decoding only, pData can be used for render.
+  *  if (sDstBufInfo.iBufferStatus==1){
+  *      output pData[0], pData[1], pData[2];
+  *  }
+  * //for Parsing only, sDstParseInfo can be used for, e.g., HW decoding
+  *  if (sDstBufInfo.iNalNum > 0){
+  *      Hardware decoding sDstParseInfo;
+  *  }
+  *  //no-delay decoding can be realized by directly calling DecodeFrameNoDelay(), which is the recommended usage.
+  *  //no-delay decoding can also be realized by directly calling DecodeFrame2() again with NULL input, as in the following. In this case, decoder would immediately reconstruct the input data. This can also be used similarly for Parsing only. Consequent decoding error and output indication should also be considered as above.
+  *  iRet = DecodeFrame2(NULL, 0, pData, &sDstBufInfo);
+  *  judge iRet, sDstBufInfo.iBufferStatus ...
+  * @endcode
+  *
+  * Step 6:uninitialize the decoder and memory free
+  * @code
+  *  Uninitialize();
+  * @endcode
+  *
+  * Step 7:destroy the decoder
+  * @code
+  *  DestroyDecoder();
+  * @endcode
+  *
+*/
+
+/**
+  * @page EncoderUsageExample1
+  *
+  * @brief
+  *  * An example for using encoder with basic parameter
+  *
+  * Step1:setup encoder
+  * @code
+  *  int rv = WelsCreateSVCEncoder (&encoder_);
+  *  ASSERT_EQ (0, rv);
+  *  ASSERT_TRUE (encoder_ != NULL);
+  * @endcode
+  *
+  * Step2:initilize with basic parameter
+  * @code
+  *  SEncParamBase param;
+  *  memset (&param, 0, sizeof (SEncParamBase));
+  *  param.iUsageType = usageType;
+  *  param.fMaxFrameRate = frameRate;
+  *  param.iPicWidth = width;
+  *  param.iPicHeight = height;
+  *  param.iTargetBitrate = 5000000;
+  *  encoder_->Initialize (&param);
+  * @endcode
+  *
+  * Step3:set option, set option during encoding process
+  * @code
+  *  encoder_->SetOption (ENCODER_OPTION_TRACE_LEVEL, &g_LevelSetting);
+  *  int videoFormat = videoFormatI420;
+  *  encoder_->SetOption (ENCODER_OPTION_DATAFORMAT, &videoFormat);
+  * @endcode
+  *
+  * Step4: encode and  store ouput bistream
+  * @code
+  *  int frameSize = width * height * 3 / 2;
+  *  BufferedData buf;
+  *  buf.SetLength (frameSize);
+  *  ASSERT_TRUE (buf.Length() == (size_t)frameSize);
+  *  SFrameBSInfo info;
+  *  memset (&info, 0, sizeof (SFrameBSInfo));
+  *  SSourcePicture pic;
+  *  memset (&pic, 0, sizeof (SsourcePicture));
+  *  pic.iPicWidth = width;
+  *  pic.iPicHeight = height;
+  *  pic.iColorFormat = videoFormatI420;
+  *  pic.iStride[0] = pic.iPicWidth;
+  *  pic.iStride[1] = pic.iStride[2] = pic.iPicWidth >> 1;
+  *  pic.pData[0] = buf.data();
+  *  pic.pData[1] = pic.pData[0] + width * height;
+  *  pic.pData[2] = pic.pData[1] + (width * height >> 2);
+  *  for(int num = 0;num<total_num;num++) {
+  *     //prepare input data
+  *     rv = encoder_->EncodeFrame (&pic, &info);
+  *     ASSERT_TRUE (rv == cmResultSuccess);
+  *     if (info.eFrameType != videoFrameTypeSkip && cbk != NULL) {
+  *      //output bitstream
+  *     }
+  *  }
+  * @endcode
+  *
+  * Step5:teardown encoder
+  * @code
+  *  if (encoder_) {
+  *      encoder_->Uninitialize();
+  *      WelsDestroySVCEncoder (encoder_);
+  *  }
+  * @endcode
+  *
+  */
+
+/**
+  * @page EncoderUsageExample2
+  *
+  * @brief
+  *     * An example for using the encoder with extension parameter.
+  *     * The same operation on Step 1,3,4,5 with Example-1
+  *
+  * Step 2:initialize with extension parameter
+  * @code
+  *  SEncParamExt param;
+  *  encoder->GetDefaultParams (&param);
+  *  param.iUsageType = usageType;
+  *  param.fMaxFrameRate = frameRate;
+  *  param.iPicWidth = width;
+  *  param.iPicHeight = height;
+  *  param.iTargetBitrate = 5000000;
+  *  param.bEnableDenoise = denoise;
+  *  param.iSpatialLayerNum = layers;
+  *  //SM_DYN_SLICE don't support multi-thread now
+  *  if (sliceMode != SM_SINGLE_SLICE && sliceMode != SM_DYN_SLICE)
+  *      param.iMultipleThreadIdc = 2;
+  *
+  *  for (int i = 0; i < param.iSpatialLayerNum; i++) {
+  *      param.sSpatialLayers[i].iVideoWidth = width >> (param.iSpatialLayerNum - 1 - i);
+  *      param.sSpatialLayers[i].iVideoHeight = height >> (param.iSpatialLayerNum - 1 - i);
+  *      param.sSpatialLayers[i].fFrameRate = frameRate;
+  *      param.sSpatialLayers[i].iSpatialBitrate = param.iTargetBitrate;
+  *
+  *      param.sSpatialLayers[i].sSliceCfg.uiSliceMode = sliceMode;
+  *      if (sliceMode == SM_DYN_SLICE) {
+  *          param.sSpatialLayers[i].sSliceCfg.sSliceArgument.uiSliceSizeConstraint = 600;
+  *          param.uiMaxNalSize = 1500;
+  *      }
+  *  }
+  *  param.iTargetBitrate *= param.iSpatialLayerNum;
+  *  encoder_->InitializeExt (&param);
+  *  int videoFormat = videoFormatI420;
+  *  encoder_->SetOption (ENCODER_OPTION_DATAFORMAT, &videoFormat);
+  *
+  * @endcode
+  */
+
+
+
+
+#ifdef __cplusplus
+/**
+* @brief Endocder definition
+*/
+class ISVCEncoder {
+ public:
+  /**
+  * @brief  Initialize the encoder
+  * @param  pParam  basic encoder parameter
+  * @return CM_RETURN: 0 - success; otherwise - failed;
+  */
+  virtual int EXTAPI Initialize (const SEncParamBase* pParam) = 0;
+
+  /**
+  * @brief  Initilaize encoder by using extension parameters.
+  * @param  pParam  extension parameter for encoder
+  * @return CM_RETURN: 0 - success; otherwise - failed;
+  */
+  virtual int EXTAPI InitializeExt (const SEncParamExt* pParam) = 0;
+
+  /**
+  * @brief   Get the default extension parameters.
+  *          If you want to change some parameters of encoder, firstly you need to get the default encoding parameters,
+  *          after that you can change part of parameters you want to.
+  * @param   pParam  extension parameter for encoder
+  * @return  CM_RETURN: 0 - success; otherwise - failed;
+  * */
+  virtual int EXTAPI GetDefaultParams (SEncParamExt* pParam) = 0;
+  /// uninitialize the encoder
+  virtual int EXTAPI Uninitialize() = 0;
+
+  /**
+  * @brief Encode one frame
+  * @param kpSrcPic the pointer to the source luminance plane
+  *        chrominance data:
+  *        CbData = kpSrc  +  m_iMaxPicWidth * m_iMaxPicHeight;
+  *        CrData = CbData + (m_iMaxPicWidth * m_iMaxPicHeight)/4;
+  *        the application calling this interface needs to ensure the data validation between the location
+  * @param pBsInfo output bit stream
+  * @return  0 - success; otherwise -failed;
+  */
+  virtual int EXTAPI EncodeFrame (const SSourcePicture* kpSrcPic, SFrameBSInfo* pBsInfo) = 0;
+
+  /**
+  * @brief  Encode the parameters from output bit stream
+  * @param  pBsInfo output bit stream
+  * @return 0 - success; otherwise - failed;
+  */
+  virtual int EXTAPI EncodeParameterSets (SFrameBSInfo* pBsInfo) = 0;
+
+  /**
+  * @brief  Force encoder to encoder frame as IDR if bIDR set as true
+  * @param  bIDR true: force encoder to encode frame as IDR frame;false, return 1 and nothing to do
+  * @return 0 - success; otherwise - failed;
+  */
+  virtual int EXTAPI ForceIntraFrame (bool bIDR, int iLayerId = -1) = 0;
+
+  /**
+  * @brief   Set option for encoder, detail option type, please refer to enumurate ENCODER_OPTION.
+  * @param   pOption option for encoder such as InDataFormat, IDRInterval, SVC Encode Param, Frame Rate, Bitrate,...
+  * @return  CM_RETURN: 0 - success; otherwise - failed;
+  */
+  virtual int EXTAPI SetOption (ENCODER_OPTION eOptionId, void* pOption) = 0;
+
+  /**
+  * @brief   Set option for encoder, detail option type, please refer to enumurate ENCODER_OPTION.
+  * @param   pOption option for encoder such as InDataFormat, IDRInterval, SVC Encode Param, Frame Rate, Bitrate,...
+  * @return  CM_RETURN: 0 - success; otherwise - failed;
+  */
+  virtual int EXTAPI GetOption (ENCODER_OPTION eOptionId, void* pOption) = 0;
+  virtual ~ISVCEncoder() {}
+};
+
+
+
+/**
+* @brief Decoder definition
+*/
+class ISVCDecoder {
+ public:
+
+  /**
+  * @brief  Initilaize decoder
+  * @param  pParam  parameter for decoder
+  * @return 0 - success; otherwise - failed;
+  */
+  virtual long EXTAPI Initialize (const SDecodingParam* pParam) = 0;
+
+  /// Uninitialize the decoder
+  virtual long EXTAPI Uninitialize() = 0;
+
+  /**
+  * @brief   Decode one frame
+  * @param   pSrc the h264 stream to be decoded
+  * @param   iSrcLen the length of h264 stream
+  * @param   ppDst buffer pointer of decoded data (YUV)
+  * @param   pStride output stride
+  * @param   iWidth output width
+  * @param   iHeight output height
+  * @return  0 - success; otherwise -failed;
+  */
+  virtual DECODING_STATE EXTAPI DecodeFrame (const unsigned char* pSrc,
+      const int iSrcLen,
+      unsigned char** ppDst,
+      int* pStride,
+      int& iWidth,
+      int& iHeight) = 0;
+
+  /**
+    * @brief    For slice level DecodeFrameNoDelay() (4 parameters input),
+    *           whatever the function return value is, the output data
+    *           of I420 format will only be available when pDstInfo->iBufferStatus == 1,.
+    *           This function will parse and reconstruct the input frame immediately if it is complete
+    *           It is recommended as the main decoding function for H.264/AVC format input
+    * @param   pSrc the h264 stream to be decoded
+    * @param   iSrcLen the length of h264 stream
+    * @param   ppDst buffer pointer of decoded data (YUV)
+    * @param   pDstInfo information provided to API(width, height, etc.)
+    * @return  0 - success; otherwise -failed;
+    */
+  virtual DECODING_STATE EXTAPI DecodeFrameNoDelay (const unsigned char* pSrc,
+      const int iSrcLen,
+      unsigned char** ppDst,
+      SBufferInfo* pDstInfo) = 0;
+
+  /**
+  * @brief    For slice level DecodeFrame2() (4 parameters input),
+  *           whatever the function return value is, the output data
+  *           of I420 format will only be available when pDstInfo->iBufferStatus == 1,.
+  *           (e.g., in multi-slice cases, only when the whole picture
+  *           is completely reconstructed, this variable would be set equal to 1.)
+  * @param   pSrc the h264 stream to be decoded
+  * @param   iSrcLen the length of h264 stream
+  * @param   ppDst buffer pointer of decoded data (YUV)
+  * @param   pDstInfo information provided to API(width, height, etc.)
+  * @return  0 - success; otherwise -failed;
+  */
+  virtual DECODING_STATE EXTAPI DecodeFrame2 (const unsigned char* pSrc,
+      const int iSrcLen,
+      unsigned char** ppDst,
+      SBufferInfo* pDstInfo) = 0;
+
+
+  /**
+  * @brief   This function gets a decoded ready frame remaining in buffers after the last frame has been decoded.
+  * Use GetOption with option DECODER_OPTION_NUM_OF_FRAMES_REMAINING_IN_BUFFER to get the number of frames remaining in buffers.
+  * Note that it is only applicable for profile_idc != 66
+  * @param   ppDst buffer pointer of decoded data (YUV)
+  * @param   pDstInfo information provided to API(width, height, etc.)
+  * @return  0 - success; otherwise -failed;
+  */
+  virtual DECODING_STATE EXTAPI FlushFrame (unsigned char** ppDst,
+      SBufferInfo* pDstInfo) = 0;
+
+  /**
+  * @brief   This function parse input bitstream only, and rewrite possible SVC syntax to AVC syntax
+  * @param   pSrc the h264 stream to be decoded
+  * @param   iSrcLen the length of h264 stream
+  * @param   pDstInfo bit stream info
+  * @return  0 - success; otherwise -failed;
+  */
+  virtual DECODING_STATE EXTAPI DecodeParser (const unsigned char* pSrc,
+      const int iSrcLen,
+      SParserBsInfo* pDstInfo) = 0;
+
+  /**
+  * @brief   This API does not work for now!! This is for future use to support non-I420 color format output.
+  * @param   pSrc the h264 stream to be decoded
+  * @param   iSrcLen the length of h264 stream
+  * @param   pDst buffer pointer of decoded data (YUV)
+  * @param   iDstStride output stride
+  * @param   iDstLen bit stream info
+  * @param   iWidth output width
+  * @param   iHeight output height
+  * @param   iColorFormat output color format
+  * @return  to do ...
+  */
+  virtual DECODING_STATE EXTAPI DecodeFrameEx (const unsigned char* pSrc,
+      const int iSrcLen,
+      unsigned char* pDst,
+      int iDstStride,
+      int& iDstLen,
+      int& iWidth,
+      int& iHeight,
+      int& iColorFormat) = 0;
+
+  /**
+  * @brief   Set option for decoder, detail option type, please refer to enumurate DECODER_OPTION.
+  * @param   pOption  option for decoder such as OutDataFormat, Eos Flag, EC method, ...
+  * @return  CM_RETURN: 0 - success; otherwise - failed;
+  */
+  virtual long EXTAPI SetOption (DECODER_OPTION eOptionId, void* pOption) = 0;
+
+  /**
+  * @brief   Get option for decoder, detail option type, please refer to enumurate DECODER_OPTION.
+  * @param   pOption  option for decoder such as OutDataFormat, Eos Flag, EC method, ...
+  * @return  CM_RETURN: 0 - success; otherwise - failed;
+  */
+  virtual long EXTAPI GetOption (DECODER_OPTION eOptionId, void* pOption) = 0;
+  virtual ~ISVCDecoder() {}
+};
+
+
+extern "C"
+{
+#else
+
+typedef struct ISVCEncoderVtbl ISVCEncoderVtbl;
+typedef const ISVCEncoderVtbl* ISVCEncoder;
+struct ISVCEncoderVtbl {
+
+int (*Initialize) (ISVCEncoder*, const SEncParamBase* pParam);
+int (*InitializeExt) (ISVCEncoder*, const SEncParamExt* pParam);
+
+int (*GetDefaultParams) (ISVCEncoder*, SEncParamExt* pParam);
+
+int (*Uninitialize) (ISVCEncoder*);
+
+int (*EncodeFrame) (ISVCEncoder*, const SSourcePicture* kpSrcPic, SFrameBSInfo* pBsInfo);
+int (*EncodeParameterSets) (ISVCEncoder*, SFrameBSInfo* pBsInfo);
+
+int (*ForceIntraFrame) (ISVCEncoder*, bool bIDR);
+
+int (*SetOption) (ISVCEncoder*, ENCODER_OPTION eOptionId, void* pOption);
+int (*GetOption) (ISVCEncoder*, ENCODER_OPTION eOptionId, void* pOption);
+};
+
+typedef struct ISVCDecoderVtbl ISVCDecoderVtbl;
+typedef const ISVCDecoderVtbl* ISVCDecoder;
+struct ISVCDecoderVtbl {
+long (*Initialize) (ISVCDecoder*, const SDecodingParam* pParam);
+long (*Uninitialize) (ISVCDecoder*);
+
+DECODING_STATE (*DecodeFrame) (ISVCDecoder*, const unsigned char* pSrc,
+                               const int iSrcLen,
+                               unsigned char** ppDst,
+                               int* pStride,
+                               int* iWidth,
+                               int* iHeight);
+
+DECODING_STATE (*DecodeFrameNoDelay) (ISVCDecoder*, const unsigned char* pSrc,
+                                      const int iSrcLen,
+                                      unsigned char** ppDst,
+                                      SBufferInfo* pDstInfo);
+
+DECODING_STATE (*DecodeFrame2) (ISVCDecoder*, const unsigned char* pSrc,
+                                const int iSrcLen,
+                                unsigned char** ppDst,
+                                SBufferInfo* pDstInfo);
+
+DECODING_STATE (*FlushFrame) (ISVCDecoder*, unsigned char** ppDst,
+                              SBufferInfo* pDstInfo);
+
+DECODING_STATE (*DecodeParser) (ISVCDecoder*, const unsigned char* pSrc,
+                                const int iSrcLen,
+                                SParserBsInfo* pDstInfo);
+
+DECODING_STATE (*DecodeFrameEx) (ISVCDecoder*, const unsigned char* pSrc,
+                                 const int iSrcLen,
+                                 unsigned char* pDst,
+                                 int iDstStride,
+                                 int* iDstLen,
+                                 int* iWidth,
+                                 int* iHeight,
+                                 int* iColorFormat);
+
+long (*SetOption) (ISVCDecoder*, DECODER_OPTION eOptionId, void* pOption);
+long (*GetOption) (ISVCDecoder*, DECODER_OPTION eOptionId, void* pOption);
+};
+#endif
+
+typedef void (*WelsTraceCallback) (void* ctx, int level, const char* string);
+
+/** @brief   Create encoder
+ *  @param   ppEncoder encoder
+ *  @return  0 - success; otherwise - failed;
+*/
+int  WelsCreateSVCEncoder (ISVCEncoder** ppEncoder);
+
+
+/** @brief   Destroy encoder
+*   @param   pEncoder encoder
+ *  @return  void
+*/
+void WelsDestroySVCEncoder (ISVCEncoder* pEncoder);
+
+
+/** @brief   Get the capability of decoder
+ *  @param   pDecCapability  decoder capability
+ *  @return  0 - success; otherwise - failed;
+*/
+int WelsGetDecoderCapability (SDecoderCapability* pDecCapability);
+
+
+/** @brief   Create decoder
+ *  @param   ppDecoder decoder
+ *  @return  0 - success; otherwise - failed;
+*/
+long WelsCreateDecoder (ISVCDecoder** ppDecoder);
+
+
+/** @brief   Destroy decoder
+ *  @param   pDecoder  decoder
+ *  @return  void
+*/
+void WelsDestroyDecoder (ISVCDecoder* pDecoder);
+
+/** @brief   Get codec version
+ *           Note, old versions of Mingw (GCC < 4.7) are buggy and use an
+ *           incorrect/different ABI for calling this function, making it
+ *           incompatible with MSVC builds.
+ *  @return  The linked codec version
+*/
+OpenH264Version WelsGetCodecVersion (void);
+
+/** @brief   Get codec version
+ *  @param   pVersion  struct to fill in with the version
+*/
+void WelsGetCodecVersionEx (OpenH264Version* pVersion);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif//WELS_VIDEO_CODEC_SVC_API_H__
--- a/codec/api/svc/codec_app_def.h
+++ b/codec/api/svc/codec_app_def.h
@@ -1,801 +1,809 @@
-/*!
- * \copy
- *     Copyright (c)  2013, Cisco Systems
- *     All rights reserved.
- *
- *     Redistribution and use in source and binary forms, with or without
- *     modification, are permitted provided that the following conditions
- *     are met:
- *
- *        * Redistributions of source code must retain the above copyright
- *          notice, this list of conditions and the following disclaimer.
- *
- *        * Redistributions in binary form must reproduce the above copyright
- *          notice, this list of conditions and the following disclaimer in
- *          the documentation and/or other materials provided with the
- *          distribution.
- *
- *     THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- *     "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- *     LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
- *     FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
- *     COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
- *     INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
- *     BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
- *     LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
- *     CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- *     LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
- *     ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- *     POSSIBILITY OF SUCH DAMAGE.
- *
- */
-
-
-
-#ifndef WELS_VIDEO_CODEC_APPLICATION_DEFINITION_H__
-#define WELS_VIDEO_CODEC_APPLICATION_DEFINITION_H__
-/**
-  * @file  codec_app_def.h
-  * @brief Data and /or structures introduced in Cisco OpenH264 application
-*/
-
-#include "codec_def.h"
-/* Constants */
-#define MAX_TEMPORAL_LAYER_NUM          4
-#define MAX_SPATIAL_LAYER_NUM           4
-#define MAX_QUALITY_LAYER_NUM           4
-
-#define MAX_LAYER_NUM_OF_FRAME          128
-#define MAX_NAL_UNITS_IN_LAYER          128     ///< predetermined here, adjust it later if need
-
-#define MAX_RTP_PAYLOAD_LEN             1000
-#define AVERAGE_RTP_PAYLOAD_LEN         800
-
-
-#define SAVED_NALUNIT_NUM_TMP           ( (MAX_SPATIAL_LAYER_NUM*MAX_QUALITY_LAYER_NUM) + 1 + MAX_SPATIAL_LAYER_NUM )  ///< SPS/PPS + SEI/SSEI + PADDING_NAL
-#define MAX_SLICES_NUM_TMP              ( ( MAX_NAL_UNITS_IN_LAYER - SAVED_NALUNIT_NUM_TMP ) / 3 )
-
-
-#define AUTO_REF_PIC_COUNT  -1          ///< encoder selects the number of reference frame automatically
-#define UNSPECIFIED_BIT_RATE 0          ///< to do: add detail comment
-
-/**
- * @brief Struct of OpenH264 version
- */
-///
-/// E.g. SDK version is 1.2.0.0, major version number is 1, minor version number is 2, and revision number is 0.
-typedef struct  _tagVersion {
-  unsigned int uMajor;                  ///< The major version number
-  unsigned int uMinor;                  ///< The minor version number
-  unsigned int uRevision;               ///< The revision number
-  unsigned int uReserved;               ///< The reserved number, it should be 0.
-} OpenH264Version;
-
-/**
-* @brief Decoding status
-*/
-typedef enum {
-  /**
-  * Errors derived from bitstream parsing
-  */
-  dsErrorFree           = 0x00,   ///< bit stream error-free
-  dsFramePending        = 0x01,   ///< need more throughput to generate a frame output,
-  dsRefLost             = 0x02,   ///< layer lost at reference frame with temporal id 0
-  dsBitstreamError      = 0x04,   ///< error bitstreams(maybe broken internal frame) the decoder cared
-  dsDepLayerLost        = 0x08,   ///< dependented layer is ever lost
-  dsNoParamSets         = 0x10,   ///< no parameter set NALs involved
-  dsDataErrorConcealed  = 0x20,   ///< current data error concealed specified
-
-  /**
-  * Errors derived from logic level
-  */
-  dsInvalidArgument     = 0x1000, ///< invalid argument specified
-  dsInitialOptExpected  = 0x2000, ///< initializing operation is expected
-  dsOutOfMemory         = 0x4000, ///< out of memory due to new request
-  /**
-  * ANY OTHERS?
-  */
-  dsDstBufNeedExpan     = 0x8000  ///< actual picture size exceeds size of dst pBuffer feed in decoder, so need expand its size
-
-} DECODING_STATE;
-
-/**
-* @brief Option types introduced in SVC encoder application
-*/
-typedef enum {
-  ENCODER_OPTION_DATAFORMAT = 0,
-  ENCODER_OPTION_IDR_INTERVAL,               ///< IDR period,0/-1 means no Intra period (only the first frame); lager than 0 means the desired IDR period, must be multiple of (2^temporal_layer)
-  ENCODER_OPTION_SVC_ENCODE_PARAM_BASE,      ///< structure of Base Param
-  ENCODER_OPTION_SVC_ENCODE_PARAM_EXT,       ///< structure of Extension Param
-  ENCODER_OPTION_FRAME_RATE,                 ///< maximal input frame rate, current supported range: MAX_FRAME_RATE = 30,MIN_FRAME_RATE = 1
-  ENCODER_OPTION_BITRATE,
-  ENCODER_OPTION_MAX_BITRATE,
-  ENCODER_OPTION_INTER_SPATIAL_PRED,
-  ENCODER_OPTION_RC_MODE,
-  ENCODER_OPTION_RC_FRAME_SKIP,
-  ENCODER_PADDING_PADDING,                   ///< 0:disable padding;1:padding
-
-  ENCODER_OPTION_PROFILE,                    ///< assgin the profile for each layer
-  ENCODER_OPTION_LEVEL,                      ///< assgin the level for each layer
-  ENCODER_OPTION_NUMBER_REF,                 ///< the number of refererence frame
-  ENCODER_OPTION_DELIVERY_STATUS,            ///< the delivery info which is a feedback from app level
-
-  ENCODER_LTR_RECOVERY_REQUEST,
-  ENCODER_LTR_MARKING_FEEDBACK,
-  ENCODER_LTR_MARKING_PERIOD,
-  ENCODER_OPTION_LTR,                        ///< 0:disable LTR;larger than 0 enable LTR; LTR number is fixed to be 2 in current encoder
-  ENCODER_OPTION_COMPLEXITY,
-
-  ENCODER_OPTION_ENABLE_SSEI,                ///< enable SSEI: true--enable ssei; false--disable ssei
-  ENCODER_OPTION_ENABLE_PREFIX_NAL_ADDING,   ///< enable prefix: true--enable prefix; false--disable prefix
-  ENCODER_OPTION_SPS_PPS_ID_STRATEGY, ///< different stategy in adjust ID in SPS/PPS: 0- constant ID, 1-additional ID, 6-mapping and additional
-
-  ENCODER_OPTION_CURRENT_PATH,
-  ENCODER_OPTION_DUMP_FILE,                  ///< dump layer reconstruct frame to a specified file
-  ENCODER_OPTION_TRACE_LEVEL,                ///< trace info based on the trace level
-  ENCODER_OPTION_TRACE_CALLBACK,             ///< a void (*)(void* context, int level, const char* message) function which receives log messages
-  ENCODER_OPTION_TRACE_CALLBACK_CONTEXT,     ///< context info of trace callback
-
-  ENCODER_OPTION_GET_STATISTICS,             ///< read only
-  ENCODER_OPTION_STATISTICS_LOG_INTERVAL,    ///< log interval in millisecond
-
-  ENCODER_OPTION_IS_LOSSLESS_LINK,            ///< advanced algorithmetic settings
-
-  ENCODER_OPTION_BITS_VARY_PERCENTAGE        ///< bit vary percentage
-} ENCODER_OPTION;
-
-/**
-* @brief Option types introduced in decoder application
-*/
-typedef enum {
-  DECODER_OPTION_END_OF_STREAM = 1,     ///< end of stream flag
-  DECODER_OPTION_VCL_NAL,               ///< feedback whether or not have VCL NAL in current AU for application layer
-  DECODER_OPTION_TEMPORAL_ID,           ///< feedback temporal id for application layer
-  DECODER_OPTION_FRAME_NUM,             ///< feedback current decoded frame number
-  DECODER_OPTION_IDR_PIC_ID,            ///< feedback current frame belong to which IDR period
-  DECODER_OPTION_LTR_MARKING_FLAG,      ///< feedback wether current frame mark a LTR
-  DECODER_OPTION_LTR_MARKED_FRAME_NUM,  ///< feedback frame num marked by current Frame
-  DECODER_OPTION_ERROR_CON_IDC,         ///< indicate decoder error concealment method
-  DECODER_OPTION_TRACE_LEVEL,
-  DECODER_OPTION_TRACE_CALLBACK,        ///< a void (*)(void* context, int level, const char* message) function which receives log messages
-  DECODER_OPTION_TRACE_CALLBACK_CONTEXT,///< context info of trace callbac
-
-  DECODER_OPTION_GET_STATISTICS,        ///< feedback decoder statistics
-  DECODER_OPTION_GET_SAR_INFO,          ///< feedback decoder Sample Aspect Ratio info in Vui
-  DECODER_OPTION_PROFILE,               ///< get current AU profile info, only is used in GetOption
-  DECODER_OPTION_LEVEL,                 ///< get current AU level info,only is used in GetOption
-  DECODER_OPTION_STATISTICS_LOG_INTERVAL,///< set log output interval
-  DECODER_OPTION_IS_REF_PIC,             ///< feedback current frame is ref pic or not
-
-} DECODER_OPTION;
-
-/**
-* @brief Enumerate the type of error concealment methods
-*/
-typedef enum {
-  ERROR_CON_DISABLE = 0,
-  ERROR_CON_FRAME_COPY,
-  ERROR_CON_SLICE_COPY,
-  ERROR_CON_FRAME_COPY_CROSS_IDR,
-  ERROR_CON_SLICE_COPY_CROSS_IDR,
-  ERROR_CON_SLICE_COPY_CROSS_IDR_FREEZE_RES_CHANGE,
-  ERROR_CON_SLICE_MV_COPY_CROSS_IDR,
-  ERROR_CON_SLICE_MV_COPY_CROSS_IDR_FREEZE_RES_CHANGE
-} ERROR_CON_IDC;
-/**
-* @brief Feedback that whether or not have VCL NAL in current AU
-*/
-typedef enum {
-  FEEDBACK_NON_VCL_NAL = 0,
-  FEEDBACK_VCL_NAL,
-  FEEDBACK_UNKNOWN_NAL
-} FEEDBACK_VCL_NAL_IN_AU;
-
-/**
-* @brief Type of layer being encoded
-*/
-typedef enum {
-  NON_VIDEO_CODING_LAYER = 0,
-  VIDEO_CODING_LAYER = 1
-} LAYER_TYPE;
-
-/**
-* @brief Spatial layer num
-*/
-typedef enum {
-  SPATIAL_LAYER_0 = 0,
-  SPATIAL_LAYER_1 = 1,
-  SPATIAL_LAYER_2 = 2,
-  SPATIAL_LAYER_3 = 3,
-  SPATIAL_LAYER_ALL = 4
-} LAYER_NUM;
-
-/**
-* @brief Enumerate the type of video bitstream which is provided to decoder
-*/
-typedef enum {
-  VIDEO_BITSTREAM_AVC               = 0,
-  VIDEO_BITSTREAM_SVC               = 1,
-  VIDEO_BITSTREAM_DEFAULT           = VIDEO_BITSTREAM_SVC
-} VIDEO_BITSTREAM_TYPE;
-
-/**
-* @brief Enumerate the type of key frame request
-*/
-typedef enum {
-  NO_RECOVERY_REQUSET  = 0,
-  LTR_RECOVERY_REQUEST = 1,
-  IDR_RECOVERY_REQUEST = 2,
-  NO_LTR_MARKING_FEEDBACK = 3,
-  LTR_MARKING_SUCCESS = 4,
-  LTR_MARKING_FAILED = 5
-} KEY_FRAME_REQUEST_TYPE;
-
-/**
-* @brief Structure for LTR recover request
-*/
-typedef struct {
-  unsigned int uiFeedbackType;       ///< IDR request or LTR recovery request
-  unsigned int uiIDRPicId;           ///< distinguish request from different IDR
-  int          iLastCorrectFrameNum;
-  int          iCurrentFrameNum;     ///< specify current decoder frame_num.
-  int          iLayerId;           //specify the layer for recovery request
-} SLTRRecoverRequest;
-
-/**
-* @brief Structure for LTR marking feedback
-*/
-typedef struct {
-  unsigned int  uiFeedbackType; ///< mark failed or successful
-  unsigned int  uiIDRPicId;     ///< distinguish request from different IDR
-  int           iLTRFrameNum;   ///< specify current decoder frame_num
-  int           iLayerId;        //specify the layer for LTR marking feedback
-} SLTRMarkingFeedback;
-
-/**
-* @brief Structure for LTR configuration
-*/
-typedef struct {
-  bool   bEnableLongTermReference; ///< 1: on, 0: off
-  int    iLTRRefNum;               ///< TODO: not supported to set it arbitrary yet
-} SLTRConfig;
-
-/**
-* @brief Enumerate the type of rate control mode
-*/
-typedef enum {
-  RC_QUALITY_MODE = 0,     ///< quality mode
-  RC_BITRATE_MODE = 1,     ///< bitrate mode
-  RC_BUFFERBASED_MODE = 2, ///< no bitrate control,only using buffer status,adjust the video quality
-  RC_TIMESTAMP_MODE = 3, //rate control based timestamp
-  RC_BITRATE_MODE_POST_SKIP = 4, ///< this is in-building RC MODE, WILL BE DELETED after algorithm tuning!
-  RC_OFF_MODE = -1,         ///< rate control off mode
-} RC_MODES;
-
-/**
-* @brief Enumerate the type of profile id
-*/
-typedef enum {
-  PRO_UNKNOWN   = 0,
-  PRO_BASELINE  = 66,
-  PRO_MAIN      = 77,
-  PRO_EXTENDED  = 88,
-  PRO_HIGH      = 100,
-  PRO_HIGH10    = 110,
-  PRO_HIGH422   = 122,
-  PRO_HIGH444   = 144,
-  PRO_CAVLC444  = 244,
-
-  PRO_SCALABLE_BASELINE = 83,
-  PRO_SCALABLE_HIGH     = 86
-} EProfileIdc;
-
-/**
-* @brief Enumerate the type of level id
-*/
-typedef enum {
-  LEVEL_UNKNOWN = 0,
-  LEVEL_1_0 = 10,
-  LEVEL_1_B = 9,
-  LEVEL_1_1 = 11,
-  LEVEL_1_2 = 12,
-  LEVEL_1_3 = 13,
-  LEVEL_2_0 = 20,
-  LEVEL_2_1 = 21,
-  LEVEL_2_2 = 22,
-  LEVEL_3_0 = 30,
-  LEVEL_3_1 = 31,
-  LEVEL_3_2 = 32,
-  LEVEL_4_0 = 40,
-  LEVEL_4_1 = 41,
-  LEVEL_4_2 = 42,
-  LEVEL_5_0 = 50,
-  LEVEL_5_1 = 51,
-  LEVEL_5_2 = 52
-} ELevelIdc;
-
-/**
-* @brief Enumerate the type of wels log
-*/
-enum {
-  WELS_LOG_QUIET       = 0x00,          ///< quiet mode
-  WELS_LOG_ERROR       = 1 << 0,        ///< error log iLevel
-  WELS_LOG_WARNING     = 1 << 1,        ///< Warning log iLevel
-  WELS_LOG_INFO        = 1 << 2,        ///< information log iLevel
-  WELS_LOG_DEBUG       = 1 << 3,        ///< debug log, critical algo log
-  WELS_LOG_DETAIL      = 1 << 4,        ///< per packet/frame log
-  WELS_LOG_RESV        = 1 << 5,        ///< resversed log iLevel
-  WELS_LOG_LEVEL_COUNT = 6,
-  WELS_LOG_DEFAULT     = WELS_LOG_WARNING   ///< default log iLevel in Wels codec
-};
-
-/**
- * @brief Enumerate the type of slice mode
- */
-typedef enum {
-  SM_SINGLE_SLICE         = 0, ///< | SliceNum==1
-  SM_FIXEDSLCNUM_SLICE    = 1, ///< | according to SliceNum        | enabled dynamic slicing for multi-thread
-  SM_RASTER_SLICE         = 2, ///< | according to SlicesAssign    | need input of MB numbers each slice. In addition, if other constraint in SSliceArgument is presented, need to follow the constraints. Typically if MB num and slice size are both constrained, re-encoding may be involved.
-  SM_SIZELIMITED_SLICE           = 3, ///< | according to SliceSize       | slicing according to size, the slicing will be dynamic(have no idea about slice_nums until encoding current frame)
-  SM_RESERVED             = 4
-} SliceModeEnum;
-
-/**
- * @brief Structure for slice argument
- */
-typedef struct {
-  SliceModeEnum uiSliceMode;    ///< by default, uiSliceMode will be SM_SINGLE_SLICE
-  unsigned int  uiSliceNum;     ///< only used when uiSliceMode=1, when uiSliceNum=0 means auto design it with cpu core number
-  unsigned int  uiSliceMbNum[MAX_SLICES_NUM_TMP]; ///< only used when uiSliceMode=2; when =0 means setting one MB row a slice
-  unsigned int  uiSliceSizeConstraint; ///< now only used when uiSliceMode=4
-} SSliceArgument;
-
-/**
-* @brief Enumerate the type of video format
-*/
-typedef enum {
-  VF_COMPONENT,
-  VF_PAL,
-  VF_NTSC,
-  VF_SECAM,
-  VF_MAC,
-  VF_UNDEF,
-  VF_NUM_ENUM
-} EVideoFormatSPS;	// EVideoFormat is already defined/used elsewhere!
-
-/**
-* @brief Enumerate the type of color primaries
-*/
-typedef enum {
-  CP_RESERVED0,
-  CP_BT709,
-  CP_UNDEF,
-  CP_RESERVED3,
-  CP_BT470M,
-  CP_BT470BG,
-  CP_SMPTE170M,
-  CP_SMPTE240M,
-  CP_FILM,
-  CP_BT2020,
-  CP_NUM_ENUM
-} EColorPrimaries;
-
-/**
-* @brief Enumerate the type of transfer characteristics
-*/
-typedef enum {
-  TRC_RESERVED0,
-  TRC_BT709,
-  TRC_UNDEF,
-  TRC_RESERVED3,
-  TRC_BT470M,
-  TRC_BT470BG,
-  TRC_SMPTE170M,
-  TRC_SMPTE240M,
-  TRC_LINEAR,
-  TRC_LOG100,
-  TRC_LOG316,
-  TRC_IEC61966_2_4,
-  TRC_BT1361E,
-  TRC_IEC61966_2_1,
-  TRC_BT2020_10,
-  TRC_BT2020_12,
-  TRC_NUM_ENUM
-} ETransferCharacteristics;
-
-/**
-* @brief Enumerate the type of color matrix
-*/
-typedef enum {
-  CM_GBR,
-  CM_BT709,
-  CM_UNDEF,
-  CM_RESERVED3,
-  CM_FCC,
-  CM_BT470BG,
-  CM_SMPTE170M,
-  CM_SMPTE240M,
-  CM_YCGCO,
-  CM_BT2020NC,
-  CM_BT2020C,
-  CM_NUM_ENUM
-} EColorMatrix;
-
-
-/**
-* @brief Enumerate the type of sample aspect ratio
-*/
-typedef enum {
-  ASP_UNSPECIFIED = 0,
-  ASP_1x1 = 1,
-  ASP_12x11 = 2,
-  ASP_10x11 = 3,
-  ASP_16x11 = 4,
-  ASP_40x33 = 5,
-  ASP_24x11 = 6,
-  ASP_20x11 = 7,
-  ASP_32x11 = 8,
-  ASP_80x33 = 9,
-  ASP_18x11 = 10,
-  ASP_15x11 = 11,
-  ASP_64x33 = 12,
-  ASP_160x99 = 13,
-
-  ASP_EXT_SAR = 255
-} ESampleAspectRatio;
-
-
-/**
-* @brief  Structure for spatial layer configuration
-*/
-typedef struct {
-  int   iVideoWidth;           ///< width of picture in luminance samples of a layer
-  int   iVideoHeight;          ///< height of picture in luminance samples of a layer
-  float fFrameRate;            ///< frame rate specified for a layer
-  int   iSpatialBitrate;       ///< target bitrate for a spatial layer, in unit of bps
-  int   iMaxSpatialBitrate;    ///< maximum  bitrate for a spatial layer, in unit of bps
-  EProfileIdc  uiProfileIdc;   ///< value of profile IDC (PRO_UNKNOWN for auto-detection)
-  ELevelIdc    uiLevelIdc;     ///< value of profile IDC (0 for auto-detection)
-  int          iDLayerQp;      ///< value of level IDC (0 for auto-detection)
-
-  SSliceArgument sSliceArgument;
-
-  // Note: members bVideoSignalTypePresent through uiColorMatrix below are also defined in SWelsSPS in parameter_sets.h.
-  bool			bVideoSignalTypePresent;	// false => do not write any of the following information to the header
-  unsigned char	uiVideoFormat;				// EVideoFormatSPS; 3 bits in header; 0-5 => component, kpal, ntsc, secam, mac, undef
-  bool			bFullRange;					// false => analog video data range [16, 235]; true => full data range [0,255]
-  bool			bColorDescriptionPresent;	// false => do not write any of the following three items to the header
-  unsigned char	uiColorPrimaries;			// EColorPrimaries; 8 bits in header; 0 - 9 => ???, bt709, undef, ???, bt470m, bt470bg,
-                                            //    smpte170m, smpte240m, film, bt2020
-  unsigned char	uiTransferCharacteristics;	// ETransferCharacteristics; 8 bits in header; 0 - 15 => ???, bt709, undef, ???, bt470m, bt470bg, smpte170m,
-										    //   smpte240m, linear, log100, log316, iec61966-2-4, bt1361e, iec61966-2-1, bt2020-10, bt2020-12
-  unsigned char	uiColorMatrix;				// EColorMatrix; 8 bits in header (corresponds to FFmpeg "colorspace"); 0 - 10 => GBR, bt709,
-										    //   undef, ???, fcc, bt470bg, smpte170m, smpte240m, YCgCo, bt2020nc, bt2020c
-
-  bool bAspectRatioPresent; ///< aspect ratio present in VUI
-  ESampleAspectRatio eAspectRatio; ///< aspect ratio idc
-  unsigned short sAspectRatioExtWidth; ///< use if aspect ratio idc == 255
-  unsigned short sAspectRatioExtHeight; ///< use if aspect ratio idc == 255
-
-} SSpatialLayerConfig;
-
-/**
-* @brief Encoder usage type
-*/
-typedef enum {
-  CAMERA_VIDEO_REAL_TIME,      ///< camera video for real-time communication
-  SCREEN_CONTENT_REAL_TIME,    ///< screen content signal
-  CAMERA_VIDEO_NON_REAL_TIME,
-  SCREEN_CONTENT_NON_REAL_TIME,
-  INPUT_CONTENT_TYPE_ALL,
-} EUsageType;
-
-/**
-* @brief Enumulate the complexity mode
-*/
-typedef enum {
-  LOW_COMPLEXITY = 0 ,             ///< the lowest compleixty,the fastest speed,
-  MEDIUM_COMPLEXITY,          ///< medium complexity, medium speed,medium quality
-  HIGH_COMPLEXITY             ///< high complexity, lowest speed, high quality
-} ECOMPLEXITY_MODE;
-
-/**
- * @brief Enumulate for the stategy of SPS/PPS strategy
- */
-typedef enum {
-  CONSTANT_ID = 0,           ///< constant id in SPS/PPS
-  INCREASING_ID = 0x01,      ///< SPS/PPS id increases at each IDR
-  SPS_LISTING  = 0x02,       ///< using SPS in the existing list if possible
-  SPS_LISTING_AND_PPS_INCREASING  = 0x03,
-  SPS_PPS_LISTING  = 0x06,
-} EParameterSetStrategy;
-
-// TODO:  Refine the parameters definition.
-/**
-* @brief SVC Encoding Parameters
-*/
-typedef struct TagEncParamBase {
-  EUsageType
-  iUsageType;                 ///< application type; please refer to the definition of EUsageType
-
-  int       iPicWidth;        ///< width of picture in luminance samples (the maximum of all layers if multiple spatial layers presents)
-  int       iPicHeight;       ///< height of picture in luminance samples((the maximum of all layers if multiple spatial layers presents)
-  int       iTargetBitrate;   ///< target bitrate desired, in unit of bps
-  RC_MODES  iRCMode;          ///< rate control mode
-  float     fMaxFrameRate;    ///< maximal input frame rate
-
-} SEncParamBase, *PEncParamBase;
-
-/**
-* @brief SVC Encoding Parameters extention
-*/
-typedef struct TagEncParamExt {
-  EUsageType
-  iUsageType;                          ///< same as in TagEncParamBase
-
-  int       iPicWidth;                 ///< same as in TagEncParamBase
-  int       iPicHeight;                ///< same as in TagEncParamBase
-  int       iTargetBitrate;            ///< same as in TagEncParamBase
-  RC_MODES  iRCMode;                   ///< same as in TagEncParamBase
-  float     fMaxFrameRate;             ///< same as in TagEncParamBase
-
-  int       iTemporalLayerNum;         ///< temporal layer number, max temporal layer = 4
-  int       iSpatialLayerNum;          ///< spatial layer number,1<= iSpatialLayerNum <= MAX_SPATIAL_LAYER_NUM, MAX_SPATIAL_LAYER_NUM = 4
-  SSpatialLayerConfig sSpatialLayers[MAX_SPATIAL_LAYER_NUM];
-
-  ECOMPLEXITY_MODE iComplexityMode;
-  unsigned int      uiIntraPeriod;     ///< period of Intra frame
-  int               iNumRefFrame;      ///< number of reference frame used
-  EParameterSetStrategy
-  eSpsPpsIdStrategy;       ///< different stategy in adjust ID in SPS/PPS: 0- constant ID, 1-additional ID, 6-mapping and additional
-  bool    bPrefixNalAddingCtrl;        ///< false:not use Prefix NAL; true: use Prefix NAL
-  bool    bEnableSSEI;                 ///< false:not use SSEI; true: use SSEI -- TODO: planning to remove the interface of SSEI
-  bool    bSimulcastAVC;               ///< (when encoding more than 1 spatial layer) false: use SVC syntax for higher layers; true: use Simulcast AVC
-  int     iPaddingFlag;                ///< 0:disable padding;1:padding
-  int     iEntropyCodingModeFlag;      ///< 0:CAVLC  1:CABAC.
-
-  /* rc control */
-  bool    bEnableFrameSkip;            ///< False: don't skip frame even if VBV buffer overflow.True: allow skipping frames to keep the bitrate within limits
-  int     iMaxBitrate;                 ///< the maximum bitrate, in unit of bps, set it to UNSPECIFIED_BIT_RATE if not needed
-  int     iMaxQp;                      ///< the maximum QP encoder supports
-  int     iMinQp;                      ///< the minmum QP encoder supports
-  unsigned int uiMaxNalSize;           ///< the maximum NAL size.  This value should be not 0 for dynamic slice mode
-
-  /*LTR settings*/
-  bool     bEnableLongTermReference;   ///< 1: on, 0: off
-  int      iLTRRefNum;                 ///< the number of LTR(long term reference),TODO: not supported to set it arbitrary yet
-  unsigned int      iLtrMarkPeriod;    ///< the LTR marked period that is used in feedback.
-  /* multi-thread settings*/
-  unsigned short
-  iMultipleThreadIdc;                  ///< 1 # 0: auto(dynamic imp. internal encoder); 1: multiple threads imp. disabled; lager than 1: count number of threads;
-  bool  bUseLoadBalancing; ///< only used when uiSliceMode=1 or 3, will change slicing of a picture during the run-time of multi-thread encoding, so the result of each run may be different
-
-  /* Deblocking loop filter */
-  int       iLoopFilterDisableIdc;     ///< 0: on, 1: off, 2: on except for slice boundaries
-  int       iLoopFilterAlphaC0Offset;  ///< AlphaOffset: valid range [-6, 6], default 0
-  int       iLoopFilterBetaOffset;     ///< BetaOffset: valid range [-6, 6], default 0
-  /*pre-processing feature*/
-  bool    bEnableDenoise;              ///< denoise control
-  bool    bEnableBackgroundDetection;  ///< background detection control //VAA_BACKGROUND_DETECTION //BGD cmd
-  bool    bEnableAdaptiveQuant;        ///< adaptive quantization control
-  bool    bEnableFrameCroppingFlag;    ///< enable frame cropping flag: TRUE always in application
-  bool    bEnableSceneChangeDetect;
-
-  bool    bIsLosslessLink;            ///<  LTR advanced setting
-} SEncParamExt;
-
-/**
-* @brief Define a new struct to show the property of video bitstream.
-*/
-typedef struct {
-  unsigned int          size;          ///< size of the struct
-  VIDEO_BITSTREAM_TYPE  eVideoBsType;  ///< video stream type (AVC/SVC)
-} SVideoProperty;
-
-/**
-* @brief SVC Decoding Parameters, reserved here and potential applicable in the future
-*/
-typedef struct TagSVCDecodingParam {
-  char*     pFileNameRestructed;       ///< file name of reconstructed frame used for PSNR calculation based debug
-
-  unsigned int  uiCpuLoad;             ///< CPU load
-  unsigned char uiTargetDqLayer;       ///< setting target dq layer id
-
-  ERROR_CON_IDC eEcActiveIdc;          ///< whether active error concealment feature in decoder
-  bool bParseOnly;                     ///< decoder for parse only, no reconstruction. When it is true, SPS/PPS size should not exceed SPS_PPS_BS_SIZE (128). Otherwise, it will return error info
-
-  SVideoProperty   sVideoProperty;    ///< video stream property
-} SDecodingParam, *PDecodingParam;
-
-/**
-* @brief Bitstream inforamtion of a layer being encoded
-*/
-typedef struct {
-  unsigned char uiTemporalId;
-  unsigned char uiSpatialId;
-  unsigned char uiQualityId;
-  EVideoFrameType eFrameType;
-  unsigned char uiLayerType;
-
-  /**
-   * The sub sequence layers are ordered hierarchically based on their dependency on each other so that any picture in a layer shall not be
-   * predicted from any picture on any higher layer.
-  */
-  int   iSubSeqId;                ///< refer to D.2.11 Sub-sequence information SEI message semantics
-  int   iNalCount;              ///< count number of NAL coded already
-  int*  pNalLengthInByte;       ///< length of NAL size in byte from 0 to iNalCount-1
-  unsigned char*  pBsBuf;       ///< buffer of bitstream contained
-} SLayerBSInfo, *PLayerBSInfo;
-
-/**
-* @brief Frame bit stream info
-*/
-typedef struct {
-  int           iLayerNum;
-  SLayerBSInfo  sLayerInfo[MAX_LAYER_NUM_OF_FRAME];
-
-  EVideoFrameType eFrameType;
-  int iFrameSizeInBytes;
-  long long uiTimeStamp;
-} SFrameBSInfo, *PFrameBSInfo;
-
-/**
-*  @brief Structure for source picture
-*/
-typedef struct Source_Picture_s {
-  int       iColorFormat;          ///< color space type
-  int       iStride[4];            ///< stride for each plane pData
-  unsigned char*  pData[4];        ///< plane pData
-  int       iPicWidth;             ///< luma picture width in x coordinate
-  int       iPicHeight;            ///< luma picture height in y coordinate
-  long long uiTimeStamp;           ///< timestamp of the source picture, unit: millisecond
-} SSourcePicture;
-/**
-* @brief Structure for bit rate info
-*/
-typedef struct TagBitrateInfo {
-  LAYER_NUM iLayer;
-  int iBitrate;                    ///< the maximum bitrate
-} SBitrateInfo;
-
-/**
-* @brief Structure for dump layer info
-*/
-typedef struct TagDumpLayer {
-  int iLayer;
-  char* pFileName;
-} SDumpLayer;
-
-/**
-* @brief Structure for profile info in layer
-*
-*/
-typedef struct TagProfileInfo {
-  int iLayer;
-  EProfileIdc uiProfileIdc;        ///< the profile info
-} SProfileInfo;
-
-/**
-* @brief  Structure for level info in layer
-*
-*/
-typedef struct TagLevelInfo {
-  int iLayer;
-  ELevelIdc uiLevelIdc;            ///< the level info
-} SLevelInfo;
-/**
-* @brief Structure for dilivery status
-*
-*/
-typedef struct TagDeliveryStatus {
-  bool bDeliveryFlag;              ///< 0: the previous frame isn't delivered,1: the previous frame is delivered
-  int iDropFrameType;              ///< the frame type that is dropped; reserved
-  int iDropFrameSize;              ///< the frame size that is dropped; reserved
-} SDeliveryStatus;
-
-/**
-* @brief The capability of decoder, for SDP negotiation
-*/
-typedef struct TagDecoderCapability {
-  int iProfileIdc;     ///< profile_idc
-  int iProfileIop;     ///< profile-iop
-  int iLevelIdc;       ///< level_idc
-  int iMaxMbps;        ///< max-mbps
-  int iMaxFs;          ///< max-fs
-  int iMaxCpb;         ///< max-cpb
-  int iMaxDpb;         ///< max-dpb
-  int iMaxBr;          ///< max-br
-  bool bRedPicCap;     ///< redundant-pic-cap
-} SDecoderCapability;
-
-/**
-* @brief Structure for parse only output
-*/
-typedef struct TagParserBsInfo {
-  int iNalNum;                                 ///< total NAL number in current AU
-  int *pNalLenInByte;  ///< each nal length
-  unsigned char* pDstBuff;                     ///< outputted dst buffer for parsed bitstream
-  int iSpsWidthInPixel;                        ///< required SPS width info
-  int iSpsHeightInPixel;                       ///< required SPS height info
-  unsigned long long uiInBsTimeStamp;               ///< input BS timestamp
-  unsigned long long uiOutBsTimeStamp;             ///< output BS timestamp
-} SParserBsInfo, *PParserBsInfo;
-
-/**
-* @brief Structure for encoder statistics
-*/
-typedef struct TagVideoEncoderStatistics {
-  unsigned int uiWidth;                        ///< the width of encoded frame
-  unsigned int uiHeight;                       ///< the height of encoded frame
-  //following standard, will be 16x aligned, if there are multiple spatial, this is of the highest
-  float fAverageFrameSpeedInMs;                ///< average_Encoding_Time
-
-  // rate control related
-  float fAverageFrameRate;                     ///< the average frame rate in, calculate since encoding starts, supposed that the input timestamp is in unit of ms
-  float fLatestFrameRate;                      ///< the frame rate in, in the last second, supposed that the input timestamp is in unit of ms (? useful for checking BR, but is it easy to calculate?
-  unsigned int uiBitRate;                      ///< sendrate in Bits per second, calculated within the set time-window
-  unsigned int uiAverageFrameQP;                    ///< the average QP of last encoded frame
-
-  unsigned int uiInputFrameCount;              ///< number of frames
-  unsigned int uiSkippedFrameCount;            ///< number of frames
-
-  unsigned int uiResolutionChangeTimes;        ///< uiResolutionChangeTimes
-  unsigned int uiIDRReqNum;                    ///< number of IDR requests
-  unsigned int uiIDRSentNum;                   ///< number of actual IDRs sent
-  unsigned int uiLTRSentNum;                   ///< number of LTR sent/marked
-
-  long long    iStatisticsTs;                  ///< Timestamp of updating the statistics
-
-  unsigned long iTotalEncodedBytes;
-  unsigned long iLastStatisticsBytes;
-  unsigned long iLastStatisticsFrameCount;
-} SEncoderStatistics;
-
-/**
-* @brief  Structure for decoder statistics
-*/
-typedef struct TagVideoDecoderStatistics {
-  unsigned int uiWidth;                        ///< the width of encode/decode frame
-  unsigned int uiHeight;                       ///< the height of encode/decode frame
-  float fAverageFrameSpeedInMs;                ///< average_Decoding_Time
-  float fActualAverageFrameSpeedInMs;          ///< actual average_Decoding_Time, including freezing pictures
-  unsigned int uiDecodedFrameCount;            ///< number of frames
-  unsigned int uiResolutionChangeTimes;        ///< uiResolutionChangeTimes
-  unsigned int uiIDRCorrectNum;                ///< number of correct IDR received
-  //EC on related
-  unsigned int
-  uiAvgEcRatio;                                ///< when EC is on, the average ratio of total EC areas, can be an indicator of reconstruction quality
-  unsigned int
-  uiAvgEcPropRatio;                            ///< when EC is on, the rough average ratio of propogate EC areas, can be an indicator of reconstruction quality
-  unsigned int uiEcIDRNum;                     ///< number of actual unintegrity IDR or not received but eced
-  unsigned int uiEcFrameNum;                   ///<
-  unsigned int uiIDRLostNum;                   ///< number of whole lost IDR
-  unsigned int uiFreezingIDRNum;               ///< number of freezing IDR with error (partly received), under resolution change
-  unsigned int uiFreezingNonIDRNum;            ///< number of freezing non-IDR with error
-  int iAvgLumaQp;                              ///< average luma QP. default: -1, no correct frame outputted
-  int iSpsReportErrorNum;                      ///< number of Sps Invalid report
-  int iSubSpsReportErrorNum;                   ///< number of SubSps Invalid report
-  int iPpsReportErrorNum;                      ///< number of Pps Invalid report
-  int iSpsNoExistNalNum;                       ///< number of Sps NoExist Nal
-  int iSubSpsNoExistNalNum;                    ///< number of SubSps NoExist Nal
-  int iPpsNoExistNalNum;                       ///< number of Pps NoExist Nal
-
-  unsigned int uiProfile;                ///< Profile idc in syntax
-  unsigned int uiLevel;                  ///< level idc according to Annex A-1
-
-  int iCurrentActiveSpsId;                     ///< current active SPS id
-  int iCurrentActivePpsId;                     ///< current active PPS id
-
-  unsigned int iStatisticsLogInterval;                  ///< frame interval of statistics log
-} SDecoderStatistics; // in building, coming soon
-
-/**
-* @brief Structure for sample aspect ratio (SAR) info in VUI
-*/
-typedef struct TagVuiSarInfo {
-  unsigned int uiSarWidth;                     ///< SAR width
-  unsigned int uiSarHeight;                    ///< SAR height
-  bool bOverscanAppropriateFlag;               ///< SAR overscan flag
-} SVuiSarInfo, *PVuiSarInfo;
-
-#endif//WELS_VIDEO_CODEC_APPLICATION_DEFINITION_H__
+/*!
+ * \copy
+ *     Copyright (c)  2013, Cisco Systems
+ *     All rights reserved.
+ *
+ *     Redistribution and use in source and binary forms, with or without
+ *     modification, are permitted provided that the following conditions
+ *     are met:
+ *
+ *        * Redistributions of source code must retain the above copyright
+ *          notice, this list of conditions and the following disclaimer.
+ *
+ *        * Redistributions in binary form must reproduce the above copyright
+ *          notice, this list of conditions and the following disclaimer in
+ *          the documentation and/or other materials provided with the
+ *          distribution.
+ *
+ *     THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *     "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *     LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+ *     FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+ *     COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ *     INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ *     BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ *     LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+ *     CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ *     LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
+ *     ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ *     POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+
+
+
+#ifndef WELS_VIDEO_CODEC_APPLICATION_DEFINITION_H__
+#define WELS_VIDEO_CODEC_APPLICATION_DEFINITION_H__
+/**
+  * @file  codec_app_def.h
+  * @brief Data and /or structures introduced in Cisco OpenH264 application
+*/
+
+#include "codec_def.h"
+/* Constants */
+#define MAX_TEMPORAL_LAYER_NUM          4
+#define MAX_SPATIAL_LAYER_NUM           4
+#define MAX_QUALITY_LAYER_NUM           4
+
+#define MAX_LAYER_NUM_OF_FRAME          128
+#define MAX_NAL_UNITS_IN_LAYER          128     ///< predetermined here, adjust it later if need
+
+#define MAX_RTP_PAYLOAD_LEN             1000
+#define AVERAGE_RTP_PAYLOAD_LEN         800
+
+
+#define SAVED_NALUNIT_NUM_TMP           ( (MAX_SPATIAL_LAYER_NUM*MAX_QUALITY_LAYER_NUM) + 1 + MAX_SPATIAL_LAYER_NUM )  ///< SPS/PPS + SEI/SSEI + PADDING_NAL
+#define MAX_SLICES_NUM_TMP              ( ( MAX_NAL_UNITS_IN_LAYER - SAVED_NALUNIT_NUM_TMP ) / 3 )
+
+
+#define AUTO_REF_PIC_COUNT  -1          ///< encoder selects the number of reference frame automatically
+#define UNSPECIFIED_BIT_RATE 0          ///< to do: add detail comment
+
+/**
+ * @brief Struct of OpenH264 version
+ */
+///
+/// E.g. SDK version is 1.2.0.0, major version number is 1, minor version number is 2, and revision number is 0.
+typedef struct  _tagVersion {
+  unsigned int uMajor;                  ///< The major version number
+  unsigned int uMinor;                  ///< The minor version number
+  unsigned int uRevision;               ///< The revision number
+  unsigned int uReserved;               ///< The reserved number, it should be 0.
+} OpenH264Version;
+
+/**
+* @brief Decoding status
+*/
+typedef enum {
+  /**
+  * Errors derived from bitstream parsing
+  */
+  dsErrorFree           = 0x00,   ///< bit stream error-free
+  dsFramePending        = 0x01,   ///< need more throughput to generate a frame output,
+  dsRefLost             = 0x02,   ///< layer lost at reference frame with temporal id 0
+  dsBitstreamError      = 0x04,   ///< error bitstreams(maybe broken internal frame) the decoder cared
+  dsDepLayerLost        = 0x08,   ///< dependented layer is ever lost
+  dsNoParamSets         = 0x10,   ///< no parameter set NALs involved
+  dsDataErrorConcealed  = 0x20,   ///< current data error concealed specified
+
+  /**
+  * Errors derived from logic level
+  */
+  dsInvalidArgument     = 0x1000, ///< invalid argument specified
+  dsInitialOptExpected  = 0x2000, ///< initializing operation is expected
+  dsOutOfMemory         = 0x4000, ///< out of memory due to new request
+  /**
+  * ANY OTHERS?
+  */
+  dsDstBufNeedExpan     = 0x8000  ///< actual picture size exceeds size of dst pBuffer feed in decoder, so need expand its size
+
+} DECODING_STATE;
+
+/**
+* @brief Option types introduced in SVC encoder application
+*/
+typedef enum {
+  ENCODER_OPTION_DATAFORMAT = 0,
+  ENCODER_OPTION_IDR_INTERVAL,               ///< IDR period,0/-1 means no Intra period (only the first frame); lager than 0 means the desired IDR period, must be multiple of (2^temporal_layer)
+  ENCODER_OPTION_SVC_ENCODE_PARAM_BASE,      ///< structure of Base Param
+  ENCODER_OPTION_SVC_ENCODE_PARAM_EXT,       ///< structure of Extension Param
+  ENCODER_OPTION_FRAME_RATE,                 ///< maximal input frame rate, current supported range: MAX_FRAME_RATE = 30,MIN_FRAME_RATE = 1
+  ENCODER_OPTION_BITRATE,
+  ENCODER_OPTION_MAX_BITRATE,
+  ENCODER_OPTION_INTER_SPATIAL_PRED,
+  ENCODER_OPTION_RC_MODE,
+  ENCODER_OPTION_RC_FRAME_SKIP,
+  ENCODER_PADDING_PADDING,                   ///< 0:disable padding;1:padding
+
+  ENCODER_OPTION_PROFILE,                    ///< assgin the profile for each layer
+  ENCODER_OPTION_LEVEL,                      ///< assgin the level for each layer
+  ENCODER_OPTION_NUMBER_REF,                 ///< the number of refererence frame
+  ENCODER_OPTION_DELIVERY_STATUS,            ///< the delivery info which is a feedback from app level
+
+  ENCODER_LTR_RECOVERY_REQUEST,
+  ENCODER_LTR_MARKING_FEEDBACK,
+  ENCODER_LTR_MARKING_PERIOD,
+  ENCODER_OPTION_LTR,                        ///< 0:disable LTR;larger than 0 enable LTR; LTR number is fixed to be 2 in current encoder
+  ENCODER_OPTION_COMPLEXITY,
+
+  ENCODER_OPTION_ENABLE_SSEI,                ///< enable SSEI: true--enable ssei; false--disable ssei
+  ENCODER_OPTION_ENABLE_PREFIX_NAL_ADDING,   ///< enable prefix: true--enable prefix; false--disable prefix
+  ENCODER_OPTION_SPS_PPS_ID_STRATEGY, ///< different stategy in adjust ID in SPS/PPS: 0- constant ID, 1-additional ID, 6-mapping and additional
+
+  ENCODER_OPTION_CURRENT_PATH,
+  ENCODER_OPTION_DUMP_FILE,                  ///< dump layer reconstruct frame to a specified file
+  ENCODER_OPTION_TRACE_LEVEL,                ///< trace info based on the trace level
+  ENCODER_OPTION_TRACE_CALLBACK,             ///< a void (*)(void* context, int level, const char* message) function which receives log messages
+  ENCODER_OPTION_TRACE_CALLBACK_CONTEXT,     ///< context info of trace callback
+
+  ENCODER_OPTION_GET_STATISTICS,             ///< read only
+  ENCODER_OPTION_STATISTICS_LOG_INTERVAL,    ///< log interval in millisecond
+
+  ENCODER_OPTION_IS_LOSSLESS_LINK,            ///< advanced algorithmetic settings
+
+  ENCODER_OPTION_BITS_VARY_PERCENTAGE        ///< bit vary percentage
+} ENCODER_OPTION;
+
+/**
+* @brief Option types introduced in decoder application
+*/
+typedef enum {
+  DECODER_OPTION_END_OF_STREAM = 1,     ///< end of stream flag
+  DECODER_OPTION_VCL_NAL,               ///< feedback whether or not have VCL NAL in current AU for application layer
+  DECODER_OPTION_TEMPORAL_ID,           ///< feedback temporal id for application layer
+  DECODER_OPTION_FRAME_NUM,             ///< feedback current decoded frame number
+  DECODER_OPTION_IDR_PIC_ID,            ///< feedback current frame belong to which IDR period
+  DECODER_OPTION_LTR_MARKING_FLAG,      ///< feedback wether current frame mark a LTR
+  DECODER_OPTION_LTR_MARKED_FRAME_NUM,  ///< feedback frame num marked by current Frame
+  DECODER_OPTION_ERROR_CON_IDC,         ///< indicate decoder error concealment method
+  DECODER_OPTION_TRACE_LEVEL,
+  DECODER_OPTION_TRACE_CALLBACK,        ///< a void (*)(void* context, int level, const char* message) function which receives log messages
+  DECODER_OPTION_TRACE_CALLBACK_CONTEXT,///< context info of trace callbac
+
+  DECODER_OPTION_GET_STATISTICS,        ///< feedback decoder statistics
+  DECODER_OPTION_GET_SAR_INFO,          ///< feedback decoder Sample Aspect Ratio info in Vui
+  DECODER_OPTION_PROFILE,               ///< get current AU profile info, only is used in GetOption
+  DECODER_OPTION_LEVEL,                 ///< get current AU level info,only is used in GetOption
+  DECODER_OPTION_STATISTICS_LOG_INTERVAL,///< set log output interval
+  DECODER_OPTION_IS_REF_PIC,             ///< feedback current frame is ref pic or not
+  DECODER_OPTION_NUM_OF_FRAMES_REMAINING_IN_BUFFER  ///< number of frames remaining in decoder buffer when pictures are required to re-ordered into display-order.
+
+} DECODER_OPTION;
+
+/**
+* @brief Enumerate the type of error concealment methods
+*/
+typedef enum {
+  ERROR_CON_DISABLE = 0,
+  ERROR_CON_FRAME_COPY,
+  ERROR_CON_SLICE_COPY,
+  ERROR_CON_FRAME_COPY_CROSS_IDR,
+  ERROR_CON_SLICE_COPY_CROSS_IDR,
+  ERROR_CON_SLICE_COPY_CROSS_IDR_FREEZE_RES_CHANGE,
+  ERROR_CON_SLICE_MV_COPY_CROSS_IDR,
+  ERROR_CON_SLICE_MV_COPY_CROSS_IDR_FREEZE_RES_CHANGE
+} ERROR_CON_IDC;
+/**
+* @brief Feedback that whether or not have VCL NAL in current AU
+*/
+typedef enum {
+  FEEDBACK_NON_VCL_NAL = 0,
+  FEEDBACK_VCL_NAL,
+  FEEDBACK_UNKNOWN_NAL
+} FEEDBACK_VCL_NAL_IN_AU;
+
+/**
+* @brief Type of layer being encoded
+*/
+typedef enum {
+  NON_VIDEO_CODING_LAYER = 0,
+  VIDEO_CODING_LAYER = 1
+} LAYER_TYPE;
+
+/**
+* @brief Spatial layer num
+*/
+typedef enum {
+  SPATIAL_LAYER_0 = 0,
+  SPATIAL_LAYER_1 = 1,
+  SPATIAL_LAYER_2 = 2,
+  SPATIAL_LAYER_3 = 3,
+  SPATIAL_LAYER_ALL = 4
+} LAYER_NUM;
+
+/**
+* @brief Enumerate the type of video bitstream which is provided to decoder
+*/
+typedef enum {
+  VIDEO_BITSTREAM_AVC               = 0,
+  VIDEO_BITSTREAM_SVC               = 1,
+  VIDEO_BITSTREAM_DEFAULT           = VIDEO_BITSTREAM_SVC
+} VIDEO_BITSTREAM_TYPE;
+
+/**
+* @brief Enumerate the type of key frame request
+*/
+typedef enum {
+  NO_RECOVERY_REQUSET  = 0,
+  LTR_RECOVERY_REQUEST = 1,
+  IDR_RECOVERY_REQUEST = 2,
+  NO_LTR_MARKING_FEEDBACK = 3,
+  LTR_MARKING_SUCCESS = 4,
+  LTR_MARKING_FAILED = 5
+} KEY_FRAME_REQUEST_TYPE;
+
+/**
+* @brief Structure for LTR recover request
+*/
+typedef struct {
+  unsigned int uiFeedbackType;       ///< IDR request or LTR recovery request
+  unsigned int uiIDRPicId;           ///< distinguish request from different IDR
+  int          iLastCorrectFrameNum;
+  int          iCurrentFrameNum;     ///< specify current decoder frame_num.
+  int          iLayerId;           //specify the layer for recovery request
+} SLTRRecoverRequest;
+
+/**
+* @brief Structure for LTR marking feedback
+*/
+typedef struct {
+  unsigned int  uiFeedbackType; ///< mark failed or successful
+  unsigned int  uiIDRPicId;     ///< distinguish request from different IDR
+  int           iLTRFrameNum;   ///< specify current decoder frame_num
+  int           iLayerId;        //specify the layer for LTR marking feedback
+} SLTRMarkingFeedback;
+
+/**
+* @brief Structure for LTR configuration
+*/
+typedef struct {
+  bool   bEnableLongTermReference; ///< 1: on, 0: off
+  int    iLTRRefNum;               ///< TODO: not supported to set it arbitrary yet
+} SLTRConfig;
+
+/**
+* @brief Enumerate the type of rate control mode
+*/
+typedef enum {
+  RC_QUALITY_MODE = 0,     ///< quality mode
+  RC_BITRATE_MODE = 1,     ///< bitrate mode
+  RC_BUFFERBASED_MODE = 2, ///< no bitrate control,only using buffer status,adjust the video quality
+  RC_TIMESTAMP_MODE = 3, //rate control based timestamp
+  RC_BITRATE_MODE_POST_SKIP = 4, ///< this is in-building RC MODE, WILL BE DELETED after algorithm tuning!
+  RC_OFF_MODE = -1,         ///< rate control off mode
+} RC_MODES;
+
+/**
+* @brief Enumerate the type of profile id
+*/
+typedef enum {
+  PRO_UNKNOWN   = 0,
+  PRO_BASELINE  = 66,
+  PRO_MAIN      = 77,
+  PRO_EXTENDED  = 88,
+  PRO_HIGH      = 100,
+  PRO_HIGH10    = 110,
+  PRO_HIGH422   = 122,
+  PRO_HIGH444   = 144,
+  PRO_CAVLC444  = 244,
+
+  PRO_SCALABLE_BASELINE = 83,
+  PRO_SCALABLE_HIGH     = 86
+} EProfileIdc;
+
+/**
+* @brief Enumerate the type of level id
+*/
+typedef enum {
+  LEVEL_UNKNOWN = 0,
+  LEVEL_1_0 = 10,
+  LEVEL_1_B = 9,
+  LEVEL_1_1 = 11,
+  LEVEL_1_2 = 12,
+  LEVEL_1_3 = 13,
+  LEVEL_2_0 = 20,
+  LEVEL_2_1 = 21,
+  LEVEL_2_2 = 22,
+  LEVEL_3_0 = 30,
+  LEVEL_3_1 = 31,
+  LEVEL_3_2 = 32,
+  LEVEL_4_0 = 40,
+  LEVEL_4_1 = 41,
+  LEVEL_4_2 = 42,
+  LEVEL_5_0 = 50,
+  LEVEL_5_1 = 51,
+  LEVEL_5_2 = 52
+} ELevelIdc;
+
+/**
+* @brief Enumerate the type of wels log
+*/
+enum {
+  WELS_LOG_QUIET       = 0x00,          ///< quiet mode
+  WELS_LOG_ERROR       = 1 << 0,        ///< error log iLevel
+  WELS_LOG_WARNING     = 1 << 1,        ///< Warning log iLevel
+  WELS_LOG_INFO        = 1 << 2,        ///< information log iLevel
+  WELS_LOG_DEBUG       = 1 << 3,        ///< debug log, critical algo log
+  WELS_LOG_DETAIL      = 1 << 4,        ///< per packet/frame log
+  WELS_LOG_RESV        = 1 << 5,        ///< resversed log iLevel
+  WELS_LOG_LEVEL_COUNT = 6,
+  WELS_LOG_DEFAULT     = WELS_LOG_WARNING   ///< default log iLevel in Wels codec
+};
+
+/**
+ * @brief Enumerate the type of slice mode
+ */
+typedef enum {
+  SM_SINGLE_SLICE         = 0, ///< | SliceNum==1
+  SM_FIXEDSLCNUM_SLICE    = 1, ///< | according to SliceNum        | enabled dynamic slicing for multi-thread
+  SM_RASTER_SLICE         = 2, ///< | according to SlicesAssign    | need input of MB numbers each slice. In addition, if other constraint in SSliceArgument is presented, need to follow the constraints. Typically if MB num and slice size are both constrained, re-encoding may be involved.
+  SM_SIZELIMITED_SLICE           = 3, ///< | according to SliceSize       | slicing according to size, the slicing will be dynamic(have no idea about slice_nums until encoding current frame)
+  SM_RESERVED             = 4
+} SliceModeEnum;
+
+/**
+ * @brief Structure for slice argument
+ */
+typedef struct {
+  SliceModeEnum uiSliceMode;    ///< by default, uiSliceMode will be SM_SINGLE_SLICE
+  unsigned int
+  uiSliceNum;     ///< only used when uiSliceMode=1, when uiSliceNum=0 means auto design it with cpu core number
+  unsigned int
+  uiSliceMbNum[MAX_SLICES_NUM_TMP]; ///< only used when uiSliceMode=2; when =0 means setting one MB row a slice
+  unsigned int  uiSliceSizeConstraint; ///< now only used when uiSliceMode=4
+} SSliceArgument;
+
+/**
+* @brief Enumerate the type of video format
+*/
+typedef enum {
+  VF_COMPONENT,
+  VF_PAL,
+  VF_NTSC,
+  VF_SECAM,
+  VF_MAC,
+  VF_UNDEF,
+  VF_NUM_ENUM
+} EVideoFormatSPS;  // EVideoFormat is already defined/used elsewhere!
+
+/**
+* @brief Enumerate the type of color primaries
+*/
+typedef enum {
+  CP_RESERVED0,
+  CP_BT709,
+  CP_UNDEF,
+  CP_RESERVED3,
+  CP_BT470M,
+  CP_BT470BG,
+  CP_SMPTE170M,
+  CP_SMPTE240M,
+  CP_FILM,
+  CP_BT2020,
+  CP_NUM_ENUM
+} EColorPrimaries;
+
+/**
+* @brief Enumerate the type of transfer characteristics
+*/
+typedef enum {
+  TRC_RESERVED0,
+  TRC_BT709,
+  TRC_UNDEF,
+  TRC_RESERVED3,
+  TRC_BT470M,
+  TRC_BT470BG,
+  TRC_SMPTE170M,
+  TRC_SMPTE240M,
+  TRC_LINEAR,
+  TRC_LOG100,
+  TRC_LOG316,
+  TRC_IEC61966_2_4,
+  TRC_BT1361E,
+  TRC_IEC61966_2_1,
+  TRC_BT2020_10,
+  TRC_BT2020_12,
+  TRC_NUM_ENUM
+} ETransferCharacteristics;
+
+/**
+* @brief Enumerate the type of color matrix
+*/
+typedef enum {
+  CM_GBR,
+  CM_BT709,
+  CM_UNDEF,
+  CM_RESERVED3,
+  CM_FCC,
+  CM_BT470BG,
+  CM_SMPTE170M,
+  CM_SMPTE240M,
+  CM_YCGCO,
+  CM_BT2020NC,
+  CM_BT2020C,
+  CM_NUM_ENUM
+} EColorMatrix;
+
+
+/**
+* @brief Enumerate the type of sample aspect ratio
+*/
+typedef enum {
+  ASP_UNSPECIFIED = 0,
+  ASP_1x1 = 1,
+  ASP_12x11 = 2,
+  ASP_10x11 = 3,
+  ASP_16x11 = 4,
+  ASP_40x33 = 5,
+  ASP_24x11 = 6,
+  ASP_20x11 = 7,
+  ASP_32x11 = 8,
+  ASP_80x33 = 9,
+  ASP_18x11 = 10,
+  ASP_15x11 = 11,
+  ASP_64x33 = 12,
+  ASP_160x99 = 13,
+
+  ASP_EXT_SAR = 255
+} ESampleAspectRatio;
+
+
+/**
+* @brief  Structure for spatial layer configuration
+*/
+typedef struct {
+  int   iVideoWidth;           ///< width of picture in luminance samples of a layer
+  int   iVideoHeight;          ///< height of picture in luminance samples of a layer
+  float fFrameRate;            ///< frame rate specified for a layer
+  int   iSpatialBitrate;       ///< target bitrate for a spatial layer, in unit of bps
+  int   iMaxSpatialBitrate;    ///< maximum  bitrate for a spatial layer, in unit of bps
+  EProfileIdc  uiProfileIdc;   ///< value of profile IDC (PRO_UNKNOWN for auto-detection)
+  ELevelIdc    uiLevelIdc;     ///< value of profile IDC (0 for auto-detection)
+  int          iDLayerQp;      ///< value of level IDC (0 for auto-detection)
+
+  SSliceArgument sSliceArgument;
+
+  // Note: members bVideoSignalTypePresent through uiColorMatrix below are also defined in SWelsSPS in parameter_sets.h.
+  bool      bVideoSignalTypePresent;  // false => do not write any of the following information to the header
+  unsigned char
+  uiVideoFormat;        // EVideoFormatSPS; 3 bits in header; 0-5 => component, kpal, ntsc, secam, mac, undef
+  bool      bFullRange;         // false => analog video data range [16, 235]; true => full data range [0,255]
+  bool      bColorDescriptionPresent; // false => do not write any of the following three items to the header
+  unsigned char
+  uiColorPrimaries;     // EColorPrimaries; 8 bits in header; 0 - 9 => ???, bt709, undef, ???, bt470m, bt470bg,
+  //    smpte170m, smpte240m, film, bt2020
+  unsigned char
+  uiTransferCharacteristics;  // ETransferCharacteristics; 8 bits in header; 0 - 15 => ???, bt709, undef, ???, bt470m, bt470bg, smpte170m,
+  //   smpte240m, linear, log100, log316, iec61966-2-4, bt1361e, iec61966-2-1, bt2020-10, bt2020-12
+  unsigned char
+  uiColorMatrix;        // EColorMatrix; 8 bits in header (corresponds to FFmpeg "colorspace"); 0 - 10 => GBR, bt709,
+  //   undef, ???, fcc, bt470bg, smpte170m, smpte240m, YCgCo, bt2020nc, bt2020c
+
+  bool bAspectRatioPresent; ///< aspect ratio present in VUI
+  ESampleAspectRatio eAspectRatio; ///< aspect ratio idc
+  unsigned short sAspectRatioExtWidth; ///< use if aspect ratio idc == 255
+  unsigned short sAspectRatioExtHeight; ///< use if aspect ratio idc == 255
+
+} SSpatialLayerConfig;
+
+/**
+* @brief Encoder usage type
+*/
+typedef enum {
+  CAMERA_VIDEO_REAL_TIME,      ///< camera video for real-time communication
+  SCREEN_CONTENT_REAL_TIME,    ///< screen content signal
+  CAMERA_VIDEO_NON_REAL_TIME,
+  SCREEN_CONTENT_NON_REAL_TIME,
+  INPUT_CONTENT_TYPE_ALL,
+} EUsageType;
+
+/**
+* @brief Enumulate the complexity mode
+*/
+typedef enum {
+  LOW_COMPLEXITY = 0,              ///< the lowest compleixty,the fastest speed,
+  MEDIUM_COMPLEXITY,          ///< medium complexity, medium speed,medium quality
+  HIGH_COMPLEXITY             ///< high complexity, lowest speed, high quality
+} ECOMPLEXITY_MODE;
+
+/**
+ * @brief Enumulate for the stategy of SPS/PPS strategy
+ */
+typedef enum {
+  CONSTANT_ID = 0,           ///< constant id in SPS/PPS
+  INCREASING_ID = 0x01,      ///< SPS/PPS id increases at each IDR
+  SPS_LISTING  = 0x02,       ///< using SPS in the existing list if possible
+  SPS_LISTING_AND_PPS_INCREASING  = 0x03,
+  SPS_PPS_LISTING  = 0x06,
+} EParameterSetStrategy;
+
+// TODO:  Refine the parameters definition.
+/**
+* @brief SVC Encoding Parameters
+*/
+typedef struct TagEncParamBase {
+  EUsageType
+  iUsageType;                 ///< application type; please refer to the definition of EUsageType
+
+  int       iPicWidth;        ///< width of picture in luminance samples (the maximum of all layers if multiple spatial layers presents)
+  int       iPicHeight;       ///< height of picture in luminance samples((the maximum of all layers if multiple spatial layers presents)
+  int       iTargetBitrate;   ///< target bitrate desired, in unit of bps
+  RC_MODES  iRCMode;          ///< rate control mode
+  float     fMaxFrameRate;    ///< maximal input frame rate
+
+} SEncParamBase, *PEncParamBase;
+
+/**
+* @brief SVC Encoding Parameters extention
+*/
+typedef struct TagEncParamExt {
+  EUsageType
+  iUsageType;                          ///< same as in TagEncParamBase
+
+  int       iPicWidth;                 ///< same as in TagEncParamBase
+  int       iPicHeight;                ///< same as in TagEncParamBase
+  int       iTargetBitrate;            ///< same as in TagEncParamBase
+  RC_MODES  iRCMode;                   ///< same as in TagEncParamBase
+  float     fMaxFrameRate;             ///< same as in TagEncParamBase
+
+  int       iTemporalLayerNum;         ///< temporal layer number, max temporal layer = 4
+  int       iSpatialLayerNum;          ///< spatial layer number,1<= iSpatialLayerNum <= MAX_SPATIAL_LAYER_NUM, MAX_SPATIAL_LAYER_NUM = 4
+  SSpatialLayerConfig sSpatialLayers[MAX_SPATIAL_LAYER_NUM];
+
+  ECOMPLEXITY_MODE iComplexityMode;
+  unsigned int      uiIntraPeriod;     ///< period of Intra frame
+  int               iNumRefFrame;      ///< number of reference frame used
+  EParameterSetStrategy
+  eSpsPpsIdStrategy;       ///< different stategy in adjust ID in SPS/PPS: 0- constant ID, 1-additional ID, 6-mapping and additional
+  bool    bPrefixNalAddingCtrl;        ///< false:not use Prefix NAL; true: use Prefix NAL
+  bool    bEnableSSEI;                 ///< false:not use SSEI; true: use SSEI -- TODO: planning to remove the interface of SSEI
+  bool    bSimulcastAVC;               ///< (when encoding more than 1 spatial layer) false: use SVC syntax for higher layers; true: use Simulcast AVC
+  int     iPaddingFlag;                ///< 0:disable padding;1:padding
+  int     iEntropyCodingModeFlag;      ///< 0:CAVLC  1:CABAC.
+
+  /* rc control */
+  bool    bEnableFrameSkip;            ///< False: don't skip frame even if VBV buffer overflow.True: allow skipping frames to keep the bitrate within limits
+  int     iMaxBitrate;                 ///< the maximum bitrate, in unit of bps, set it to UNSPECIFIED_BIT_RATE if not needed
+  int     iMaxQp;                      ///< the maximum QP encoder supports
+  int     iMinQp;                      ///< the minmum QP encoder supports
+  unsigned int uiMaxNalSize;           ///< the maximum NAL size.  This value should be not 0 for dynamic slice mode
+
+  /*LTR settings*/
+  bool     bEnableLongTermReference;   ///< 1: on, 0: off
+  int      iLTRRefNum;                 ///< the number of LTR(long term reference),TODO: not supported to set it arbitrary yet
+  unsigned int      iLtrMarkPeriod;    ///< the LTR marked period that is used in feedback.
+  /* multi-thread settings*/
+  unsigned short
+  iMultipleThreadIdc;                  ///< 1 # 0: auto(dynamic imp. internal encoder); 1: multiple threads imp. disabled; lager than 1: count number of threads;
+  bool  bUseLoadBalancing; ///< only used when uiSliceMode=1 or 3, will change slicing of a picture during the run-time of multi-thread encoding, so the result of each run may be different
+
+  /* Deblocking loop filter */
+  int       iLoopFilterDisableIdc;     ///< 0: on, 1: off, 2: on except for slice boundaries
+  int       iLoopFilterAlphaC0Offset;  ///< AlphaOffset: valid range [-6, 6], default 0
+  int       iLoopFilterBetaOffset;     ///< BetaOffset: valid range [-6, 6], default 0
+  /*pre-processing feature*/
+  bool    bEnableDenoise;              ///< denoise control
+  bool    bEnableBackgroundDetection;  ///< background detection control //VAA_BACKGROUND_DETECTION //BGD cmd
+  bool    bEnableAdaptiveQuant;        ///< adaptive quantization control
+  bool    bEnableFrameCroppingFlag;    ///< enable frame cropping flag: TRUE always in application
+  bool    bEnableSceneChangeDetect;
+
+  bool    bIsLosslessLink;            ///<  LTR advanced setting
+} SEncParamExt;
+
+/**
+* @brief Define a new struct to show the property of video bitstream.
+*/
+typedef struct {
+  unsigned int          size;          ///< size of the struct
+  VIDEO_BITSTREAM_TYPE  eVideoBsType;  ///< video stream type (AVC/SVC)
+} SVideoProperty;
+
+/**
+* @brief SVC Decoding Parameters, reserved here and potential applicable in the future
+*/
+typedef struct TagSVCDecodingParam {
+  char*     pFileNameRestructed;       ///< file name of reconstructed frame used for PSNR calculation based debug
+
+  unsigned int  uiCpuLoad;             ///< CPU load
+  unsigned char uiTargetDqLayer;       ///< setting target dq layer id
+
+  ERROR_CON_IDC eEcActiveIdc;          ///< whether active error concealment feature in decoder
+  bool bParseOnly;                     ///< decoder for parse only, no reconstruction. When it is true, SPS/PPS size should not exceed SPS_PPS_BS_SIZE (128). Otherwise, it will return error info
+
+  SVideoProperty   sVideoProperty;    ///< video stream property
+} SDecodingParam, *PDecodingParam;
+
+/**
+* @brief Bitstream inforamtion of a layer being encoded
+*/
+typedef struct {
+  unsigned char uiTemporalId;
+  unsigned char uiSpatialId;
+  unsigned char uiQualityId;
+  EVideoFrameType eFrameType;
+  unsigned char uiLayerType;
+
+  /**
+   * The sub sequence layers are ordered hierarchically based on their dependency on each other so that any picture in a layer shall not be
+   * predicted from any picture on any higher layer.
+  */
+  int   iSubSeqId;                ///< refer to D.2.11 Sub-sequence information SEI message semantics
+  int   iNalCount;              ///< count number of NAL coded already
+  int*  pNalLengthInByte;       ///< length of NAL size in byte from 0 to iNalCount-1
+  unsigned char*  pBsBuf;       ///< buffer of bitstream contained
+} SLayerBSInfo, *PLayerBSInfo;
+
+/**
+* @brief Frame bit stream info
+*/
+typedef struct {
+  int           iLayerNum;
+  SLayerBSInfo  sLayerInfo[MAX_LAYER_NUM_OF_FRAME];
+
+  EVideoFrameType eFrameType;
+  int iFrameSizeInBytes;
+  long long uiTimeStamp;
+} SFrameBSInfo, *PFrameBSInfo;
+
+/**
+*  @brief Structure for source picture
+*/
+typedef struct Source_Picture_s {
+  int       iColorFormat;          ///< color space type
+  int       iStride[4];            ///< stride for each plane pData
+  unsigned char*  pData[4];        ///< plane pData
+  int       iPicWidth;             ///< luma picture width in x coordinate
+  int       iPicHeight;            ///< luma picture height in y coordinate
+  long long uiTimeStamp;           ///< timestamp of the source picture, unit: millisecond
+} SSourcePicture;
+/**
+* @brief Structure for bit rate info
+*/
+typedef struct TagBitrateInfo {
+  LAYER_NUM iLayer;
+  int iBitrate;                    ///< the maximum bitrate
+} SBitrateInfo;
+
+/**
+* @brief Structure for dump layer info
+*/
+typedef struct TagDumpLayer {
+  int iLayer;
+  char* pFileName;
+} SDumpLayer;
+
+/**
+* @brief Structure for profile info in layer
+*
+*/
+typedef struct TagProfileInfo {
+  int iLayer;
+  EProfileIdc uiProfileIdc;        ///< the profile info
+} SProfileInfo;
+
+/**
+* @brief  Structure for level info in layer
+*
+*/
+typedef struct TagLevelInfo {
+  int iLayer;
+  ELevelIdc uiLevelIdc;            ///< the level info
+} SLevelInfo;
+/**
+* @brief Structure for dilivery status
+*
+*/
+typedef struct TagDeliveryStatus {
+  bool bDeliveryFlag;              ///< 0: the previous frame isn't delivered,1: the previous frame is delivered
+  int iDropFrameType;              ///< the frame type that is dropped; reserved
+  int iDropFrameSize;              ///< the frame size that is dropped; reserved
+} SDeliveryStatus;
+
+/**
+* @brief The capability of decoder, for SDP negotiation
+*/
+typedef struct TagDecoderCapability {
+  int iProfileIdc;     ///< profile_idc
+  int iProfileIop;     ///< profile-iop
+  int iLevelIdc;       ///< level_idc
+  int iMaxMbps;        ///< max-mbps
+  int iMaxFs;          ///< max-fs
+  int iMaxCpb;         ///< max-cpb
+  int iMaxDpb;         ///< max-dpb
+  int iMaxBr;          ///< max-br
+  bool bRedPicCap;     ///< redundant-pic-cap
+} SDecoderCapability;
+
+/**
+* @brief Structure for parse only output
+*/
+typedef struct TagParserBsInfo {
+  int iNalNum;                                 ///< total NAL number in current AU
+  int* pNalLenInByte;  ///< each nal length
+  unsigned char* pDstBuff;                     ///< outputted dst buffer for parsed bitstream
+  int iSpsWidthInPixel;                        ///< required SPS width info
+  int iSpsHeightInPixel;                       ///< required SPS height info
+  unsigned long long uiInBsTimeStamp;               ///< input BS timestamp
+  unsigned long long uiOutBsTimeStamp;             ///< output BS timestamp
+} SParserBsInfo, *PParserBsInfo;
+
+/**
+* @brief Structure for encoder statistics
+*/
+typedef struct TagVideoEncoderStatistics {
+  unsigned int uiWidth;                        ///< the width of encoded frame
+  unsigned int uiHeight;                       ///< the height of encoded frame
+  //following standard, will be 16x aligned, if there are multiple spatial, this is of the highest
+  float fAverageFrameSpeedInMs;                ///< average_Encoding_Time
+
+  // rate control related
+  float fAverageFrameRate;                     ///< the average frame rate in, calculate since encoding starts, supposed that the input timestamp is in unit of ms
+  float fLatestFrameRate;                      ///< the frame rate in, in the last second, supposed that the input timestamp is in unit of ms (? useful for checking BR, but is it easy to calculate?
+  unsigned int uiBitRate;                      ///< sendrate in Bits per second, calculated within the set time-window
+  unsigned int uiAverageFrameQP;                    ///< the average QP of last encoded frame
+
+  unsigned int uiInputFrameCount;              ///< number of frames
+  unsigned int uiSkippedFrameCount;            ///< number of frames
+
+  unsigned int uiResolutionChangeTimes;        ///< uiResolutionChangeTimes
+  unsigned int uiIDRReqNum;                    ///< number of IDR requests
+  unsigned int uiIDRSentNum;                   ///< number of actual IDRs sent
+  unsigned int uiLTRSentNum;                   ///< number of LTR sent/marked
+
+  long long    iStatisticsTs;                  ///< Timestamp of updating the statistics
+
+  unsigned long iTotalEncodedBytes;
+  unsigned long iLastStatisticsBytes;
+  unsigned long iLastStatisticsFrameCount;
+} SEncoderStatistics;
+
+/**
+* @brief  Structure for decoder statistics
+*/
+typedef struct TagVideoDecoderStatistics {
+  unsigned int uiWidth;                        ///< the width of encode/decode frame
+  unsigned int uiHeight;                       ///< the height of encode/decode frame
+  float fAverageFrameSpeedInMs;                ///< average_Decoding_Time
+  float fActualAverageFrameSpeedInMs;          ///< actual average_Decoding_Time, including freezing pictures
+  unsigned int uiDecodedFrameCount;            ///< number of frames
+  unsigned int uiResolutionChangeTimes;        ///< uiResolutionChangeTimes
+  unsigned int uiIDRCorrectNum;                ///< number of correct IDR received
+  //EC on related
+  unsigned int
+  uiAvgEcRatio;                                ///< when EC is on, the average ratio of total EC areas, can be an indicator of reconstruction quality
+  unsigned int
+  uiAvgEcPropRatio;                            ///< when EC is on, the rough average ratio of propogate EC areas, can be an indicator of reconstruction quality
+  unsigned int uiEcIDRNum;                     ///< number of actual unintegrity IDR or not received but eced
+  unsigned int uiEcFrameNum;                   ///<
+  unsigned int uiIDRLostNum;                   ///< number of whole lost IDR
+  unsigned int
+  uiFreezingIDRNum;               ///< number of freezing IDR with error (partly received), under resolution change
+  unsigned int uiFreezingNonIDRNum;            ///< number of freezing non-IDR with error
+  int iAvgLumaQp;                              ///< average luma QP. default: -1, no correct frame outputted
+  int iSpsReportErrorNum;                      ///< number of Sps Invalid report
+  int iSubSpsReportErrorNum;                   ///< number of SubSps Invalid report
+  int iPpsReportErrorNum;                      ///< number of Pps Invalid report
+  int iSpsNoExistNalNum;                       ///< number of Sps NoExist Nal
+  int iSubSpsNoExistNalNum;                    ///< number of SubSps NoExist Nal
+  int iPpsNoExistNalNum;                       ///< number of Pps NoExist Nal
+
+  unsigned int uiProfile;                ///< Profile idc in syntax
+  unsigned int uiLevel;                  ///< level idc according to Annex A-1
+
+  int iCurrentActiveSpsId;                     ///< current active SPS id
+  int iCurrentActivePpsId;                     ///< current active PPS id
+
+  unsigned int iStatisticsLogInterval;                  ///< frame interval of statistics log
+} SDecoderStatistics; // in building, coming soon
+
+/**
+* @brief Structure for sample aspect ratio (SAR) info in VUI
+*/
+typedef struct TagVuiSarInfo {
+  unsigned int uiSarWidth;                     ///< SAR width
+  unsigned int uiSarHeight;                    ///< SAR height
+  bool bOverscanAppropriateFlag;               ///< SAR overscan flag
+} SVuiSarInfo, *PVuiSarInfo;
+
+#endif//WELS_VIDEO_CODEC_APPLICATION_DEFINITION_H__
--- a/codec/common/inc/macros.h
+++ b/codec/common/inc/macros.h
@@ -94,6 +94,9 @@
 #ifndef WELS_MIN
 #define WELS_MIN(x, y) ((x) < (y) ? (x) : (y))
 #endif//WELS_MIN
+#ifndef WELS_MIN_POSITIVE
+#define WELS_MIN_POSITIVE(x, y) (x >= 0 && y >= 0) ? WELS_MIN(x, y) : WELS_MAX(x, y);
+#endif//WELS_MIN_POSITIVE
 #else // Alternative implementation of WELS_MAX and WELS_MIN
 #ifndef WELS_MAX
 #define WELS_MAX(x, y) ((x) - (((x)-(y))&(((x)-(y))>>31)))
--- a/codec/common/inc/wels_common_defs.h
+++ b/codec/common/inc/wels_common_defs.h
@@ -1,356 +1,373 @@
-/*!
- * \copy
- *     Copyright (c)  2013, Cisco Systems
- *     All rights reserved.
- *
- *     Redistribution and use in source and binary forms, with or without
- *     modification, are permitted provided that the following conditions
- *     are met:
- *
- *        * Redistributions of source code must retain the above copyright
- *          notice, this list of conditions and the following disclaimer.
- *
- *        * Redistributions in binary form must reproduce the above copyright
- *          notice, this list of conditions and the following disclaimer in
- *          the documentation and/or other materials provided with the
- *          distribution.
- *
- *     THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- *     "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- *     LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
- *     FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
- *     COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
- *     INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
- *     BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
- *     LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
- *     CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- *     LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
- *     ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- *     POSSIBILITY OF SUCH DAMAGE.
- *
- */
-
-//wels_common_defs.h
-#ifndef WELS_COMMON_DEFS_H__
-#define WELS_COMMON_DEFS_H__
-
-#include "typedefs.h"
-#include "macros.h"
-#include "codec_app_def.h"
-
-
-namespace WelsCommon {
-/*common use table*/
-
-#define  CTX_NA 0
-#define  WELS_CONTEXT_COUNT 460
-#define LEVEL_NUMBER 17
-typedef struct TagLevelLimits {
-  ELevelIdc uiLevelIdc;  // level idc
-  uint32_t uiMaxMBPS; // Max macroblock processing rate(MB/s)
-  uint32_t uiMaxFS;   // Max frame sizea(MBs)
-  uint32_t uiMaxDPBMbs;// Max decoded picture buffer size(MBs)
-  uint32_t uiMaxBR; // Max video bit rate
-  uint32_t uiMaxCPB; // Max CPB size
-  int16_t iMinVmv; // Vertical MV component range upper bound
-  int16_t iMaxVmv; // Vertical MV component range lower bound
-  uint16_t uiMinCR;  // Min compression ration
-  int16_t iMaxMvsPer2Mb; // Max number of motion vectors per two consecutive MBs
-} SLevelLimits;
-
-#define CpbBrNalFactor 1200  //baseline,main,and extended profiles.
-extern const SLevelLimits g_ksLevelLimits[LEVEL_NUMBER];
-extern const uint32_t g_kuiLevelMaps[LEVEL_NUMBER];
-extern const uint8_t g_kuiMbCountScan4Idx[24];
-extern const uint8_t g_kuiCache30ScanIdx[16];
-extern const uint8_t g_kuiCache48CountScan4Idx[24];
-
-extern const uint8_t g_kuiMatrixV[6][8][8];
-
-extern const uint8_t g_kuiDequantScaling4x4Default[2][16];
-extern const uint8_t g_kuiDequantScaling8x8Default[2][64];
-extern const ALIGNED_DECLARE (uint16_t, g_kuiDequantCoeff[52][8], 16);
-extern const ALIGNED_DECLARE (uint16_t, g_kuiDequantCoeff8x8[52][64], 16);
-extern const uint8_t g_kuiChromaQpTable[52];
-
-extern const uint8_t g_kuiCabacRangeLps[64][4];
-extern const int8_t g_kiCabacGlobalContextIdx[WELS_CONTEXT_COUNT][4][2];
-extern const uint8_t g_kuiStateTransTable[64][2];
-extern const uint32_t g_kuiGolombUELength[256];
-/*
- *  NAL Unit Type (5 Bits)
- */
-enum EWelsNalUnitType {
-  NAL_UNIT_UNSPEC_0             = 0,
-  NAL_UNIT_CODED_SLICE          = 1,
-  NAL_UNIT_CODED_SLICE_DPA      = 2,
-  NAL_UNIT_CODED_SLICE_DPB      = 3,
-  NAL_UNIT_CODED_SLICE_DPC      = 4,
-  NAL_UNIT_CODED_SLICE_IDR      = 5,
-  NAL_UNIT_SEI                  = 6,
-  NAL_UNIT_SPS                  = 7,
-  NAL_UNIT_PPS                  = 8,
-  NAL_UNIT_AU_DELIMITER         = 9,
-  NAL_UNIT_END_OF_SEQ           = 10,
-  NAL_UNIT_END_OF_STR           = 11,
-  NAL_UNIT_FILLER_DATA          = 12,
-  NAL_UNIT_SPS_EXT              = 13,
-  NAL_UNIT_PREFIX               = 14,
-  NAL_UNIT_SUBSET_SPS           = 15,
-  NAL_UNIT_DEPTH_PARAM          = 16, // NAL_UNIT_RESV_16
-  NAL_UNIT_RESV_17              = 17,
-  NAL_UNIT_RESV_18              = 18,
-  NAL_UNIT_AUX_CODED_SLICE      = 19,
-  NAL_UNIT_CODED_SLICE_EXT      = 20,
-  NAL_UNIT_MVC_SLICE_EXT        = 21, // NAL_UNIT_RESV_21
-  NAL_UNIT_RESV_22              = 22,
-  NAL_UNIT_RESV_23              = 23,
-  NAL_UNIT_UNSPEC_24            = 24,
-  NAL_UNIT_UNSPEC_25            = 25,
-  NAL_UNIT_UNSPEC_26            = 26,
-  NAL_UNIT_UNSPEC_27            = 27,
-  NAL_UNIT_UNSPEC_28            = 28,
-  NAL_UNIT_UNSPEC_29            = 29,
-  NAL_UNIT_UNSPEC_30            = 30,
-  NAL_UNIT_UNSPEC_31            = 31
-};
-
-/*
- *  NAL Reference IDC (2 Bits)
- */
-
-enum EWelsNalRefIdc {
-  NRI_PRI_LOWEST        = 0,
-  NRI_PRI_LOW           = 1,
-  NRI_PRI_HIGH          = 2,
-  NRI_PRI_HIGHEST       = 3
-};
-
-/*
- * VCL TYPE
- */
-
-enum EVclType {
-  NON_VCL   = 0,
-  VCL       = 1,
-  NOT_APP   = 2
-};
-
-/*
- *  vcl type map for given NAL unit type and corresponding H264 type (0: AVC; 1: SVC).
- */
-extern const EVclType g_keTypeMap[32][2];
-
-#define IS_VCL_NAL(t, ext_idx)                  (g_keTypeMap[t][ext_idx] == VCL)
-#define IS_PARAM_SETS_NALS(t)                   ( (t) == NAL_UNIT_SPS || (t) == NAL_UNIT_PPS || (t) == NAL_UNIT_SUBSET_SPS )
-#define IS_SPS_NAL(t)                           ( (t) == NAL_UNIT_SPS )
-#define IS_SUBSET_SPS_NAL(t)                    ( (t) == NAL_UNIT_SUBSET_SPS )
-#define IS_PPS_NAL(t)                           ( (t) == NAL_UNIT_PPS )
-#define IS_SEI_NAL(t)                           ( (t) == NAL_UNIT_SEI )
-#define IS_AU_DELIMITER_NAL(t)                  ( (t) == NAL_UNIT_AU_DELIMITER )
-#define IS_PREFIX_NAL(t)                        ( (t) == NAL_UNIT_PREFIX )
-#define IS_SUBSET_SPS_USED(t)                   ( (t) == NAL_UNIT_SUBSET_SPS || (t) == NAL_UNIT_CODED_SLICE_EXT )
-#define IS_VCL_NAL_AVC_BASE(t)                  ( (t) == NAL_UNIT_CODED_SLICE || (t) == NAL_UNIT_CODED_SLICE_IDR )
-#define IS_NEW_INTRODUCED_SVC_NAL(t)            ( (t) == NAL_UNIT_PREFIX || (t) == NAL_UNIT_CODED_SLICE_EXT )
-
-
-/* Base SSlice Types
- * Invalid in case of eSliceType exceeds 9,
- * Need trim when eSliceType > 4 as fixed SliceType(eSliceType-4),
- * meaning mapped version after eSliceType minus 4.
- */
-
-enum EWelsSliceType {
-  P_SLICE       = 0,
-  B_SLICE       = 1,
-  I_SLICE       = 2,
-  SP_SLICE      = 3,
-  SI_SLICE      = 4,
-  UNKNOWN_SLICE = 5
-};
-
-/* SSlice Types in scalable extension */
-enum ESliceTypeExt {
-  EP_SLICE = 0, // EP_SLICE: 0, 5
-  EB_SLICE = 1, // EB_SLICE: 1, 6
-  EI_SLICE = 2  // EI_SLICE: 2, 7
-};
-
-/* List Index */
-enum EListIndex {
-  LIST_0    = 0,
-  LIST_1    = 1,
-  LIST_A    = 2
-};
-
-
-
-/* Motion Vector components */
-enum EMvComp {
-  MV_X  = 0,
-  MV_Y  = 1,
-  MV_A  = 2
-};
-
-/* Chroma Components */
-
-enum EChromaComp {
-  CHROMA_CB     = 0,
-  CHROMA_CR     = 1,
-  CHROMA_A      = 2
-};
-
-
-
-/*
- *  Memory Management Control Operation (MMCO) code
- */
-enum EMmcoCode {
-  MMCO_END          = 0,
-  MMCO_SHORT2UNUSED = 1,
-  MMCO_LONG2UNUSED  = 2,
-  MMCO_SHORT2LONG   = 3,
-  MMCO_SET_MAX_LONG = 4,
-  MMCO_RESET        = 5,
-  MMCO_LONG         = 6
-};
-
-enum EVuiVideoFormat {
-   VUI_COMPONENT   = 0,
-   VUI_PAL         = 1,
-   VUI_NTSC        = 2,
-   VUI_SECAM       = 3,
-   VUI_MAC         = 4,
-   VUI_UNSPECIFIED = 5,
-   VUI_RESERVED1   = 6,
-   VUI_RESERVED2   = 7
-};
-
-/*
- *  Bit-stream auxiliary reading / writing
- */
-typedef struct TagBitStringAux {
-  uint8_t* pStartBuf;   // buffer to start position
-  uint8_t* pEndBuf;     // buffer + length
-  int32_t  iBits;       // count bits of overall bitstreaming input
-
-  intX_t   iIndex;      //only for cavlc usage
-  uint8_t* pCurBuf;     // current reading position
-  uint32_t uiCurBits;
-  int32_t  iLeftBits;   // count number of available bits left ([1, 8]),
-  // need pointer to next byte start position in case 0 bit left then 8 instead
-} SBitStringAux, *PBitStringAux;
-
-/* NAL Unix Header in AVC, refer to Page 56 in JVT X201wcm */
-typedef struct TagNalUnitHeader {
-  uint8_t             uiForbiddenZeroBit;
-  uint8_t             uiNalRefIdc;
-  EWelsNalUnitType    eNalUnitType;
-  uint8_t             uiReservedOneByte;                // only padding usage
-} SNalUnitHeader, *PNalUnitHeader;
-
-/* NAL Unit Header in scalable extension syntax, refer to Page 390 in JVT X201wcm */
-typedef struct TagNalUnitHeaderExt {
-  SNalUnitHeader      sNalUnitHeader;
-
-  // uint8_t   reserved_one_bit;
-  bool      bIdrFlag;
-  uint8_t   uiPriorityId;
-  int8_t    iNoInterLayerPredFlag;      // change as int8_t to support 3 values probably in encoder
-  uint8_t   uiDependencyId;
-
-  uint8_t   uiQualityId;
-  uint8_t   uiTemporalId;
-  bool      bUseRefBasePicFlag;
-  bool      bDiscardableFlag;
-
-  bool      bOutputFlag;
-  uint8_t   uiReservedThree2Bits;
-  // Derived variable(s)
-  uint8_t   uiLayerDqId;
-  bool      bNalExtFlag;
-} SNalUnitHeaderExt, *PNalUnitHeaderExt;
-
-/* AVC MB types*/
-#define MB_TYPE_INTRA4x4    0x00000001
-#define MB_TYPE_INTRA16x16  0x00000002
-#define MB_TYPE_INTRA8x8    0x00000004
-#define MB_TYPE_16x16       0x00000008
-#define MB_TYPE_16x8        0x00000010
-#define MB_TYPE_8x16        0x00000020
-#define MB_TYPE_8x8         0x00000040
-#define MB_TYPE_8x8_REF0    0x00000080
-#define MB_TYPE_SKIP        0x00000100
-#define MB_TYPE_INTRA_PCM   0x00000200
-#define MB_TYPE_INTRA_BL    0x00000400
-
-#define MB_TYPE_DIRECT2     0x00004000
-
-#define SUB_MB_TYPE_8x8     0x00000001
-#define SUB_MB_TYPE_8x4     0x00000002
-#define SUB_MB_TYPE_4x8     0x00000004
-#define SUB_MB_TYPE_4x4     0x00000008
-
-#define MB_TYPE_INTRA     (MB_TYPE_INTRA4x4 | MB_TYPE_INTRA16x16 | MB_TYPE_INTRA8x8 | MB_TYPE_INTRA_PCM)
-#define MB_TYPE_INTER     (MB_TYPE_16x16 | MB_TYPE_16x8 | MB_TYPE_8x16 | MB_TYPE_8x8 | MB_TYPE_8x8_REF0 | MB_TYPE_SKIP)
-#define IS_INTRA4x4(type) ( MB_TYPE_INTRA4x4 == (type) )
-#define IS_INTRA8x8(type) ( MB_TYPE_INTRA8x8 == (type) )
-#define IS_INTRANxN(type) ( MB_TYPE_INTRA4x4 == (type) || MB_TYPE_INTRA8x8 == (type) )
-#define IS_INTRA16x16(type) ( MB_TYPE_INTRA16x16 == (type) )
-#define IS_INTRA(type) ( (type)&MB_TYPE_INTRA )
-#define IS_INTER(type) ( (type)&MB_TYPE_INTER )
-
-#define IS_SKIP(type) ( (type) == MB_TYPE_SKIP )
-#define IS_SVC_INTER(type) IS_INTER(type)
-#define IS_I_BL(type) ( (type) == MB_TYPE_INTRA_BL )
-#define IS_SVC_INTRA(type) ( IS_I_BL(type) || IS_INTRA(type) )
-#define IS_Inter_8x8(type) ( (type) == MB_TYPE_8x8)
-
-#define REF_NOT_AVAIL   -2
-#define REF_NOT_IN_LIST -1  //intra
-
-/////////intra16x16  Luma
-#define I16_PRED_INVALID   -1
-#define I16_PRED_V       0
-#define I16_PRED_H       1
-#define I16_PRED_DC      2
-#define I16_PRED_P       3
-
-#define I16_PRED_DC_L    4
-#define I16_PRED_DC_T    5
-#define I16_PRED_DC_128  6
-#define I16_PRED_DC_A  7
-//////////intra4x4   Luma
-// Here, I8x8 also use these definitions
-#define I4_PRED_INVALID    0
-#define I4_PRED_V        0
-#define I4_PRED_H        1
-#define I4_PRED_DC       2
-#define I4_PRED_DDL      3 //diagonal_down_left
-#define I4_PRED_DDR      4 //diagonal_down_right
-#define I4_PRED_VR       5 //vertical_right
-#define I4_PRED_HD       6 //horizon_down
-#define I4_PRED_VL       7 //vertical_left
-#define I4_PRED_HU       8 //horizon_up
-
-#define I4_PRED_DC_L     9
-#define I4_PRED_DC_T     10
-#define I4_PRED_DC_128   11
-
-#define I4_PRED_DDL_TOP  12 //right-top replacing by padding rightmost pixel of top
-#define I4_PRED_VL_TOP   13 //right-top replacing by padding rightmost pixel of top
-#define I4_PRED_A   14
-
-//////////intra Chroma
-#define C_PRED_INVALID   -1
-#define C_PRED_DC        0
-#define C_PRED_H         1
-#define C_PRED_V         2
-#define C_PRED_P         3
-
-#define C_PRED_DC_L      4
-#define C_PRED_DC_T      5
-#define C_PRED_DC_128    6
-#define C_PRED_A    7
-}
-#endif//WELS_COMMON_DEFS_H__
+/*!
+ * \copy
+ *     Copyright (c)  2013, Cisco Systems
+ *     All rights reserved.
+ *
+ *     Redistribution and use in source and binary forms, with or without
+ *     modification, are permitted provided that the following conditions
+ *     are met:
+ *
+ *        * Redistributions of source code must retain the above copyright
+ *          notice, this list of conditions and the following disclaimer.
+ *
+ *        * Redistributions in binary form must reproduce the above copyright
+ *          notice, this list of conditions and the following disclaimer in
+ *          the documentation and/or other materials provided with the
+ *          distribution.
+ *
+ *     THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *     "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *     LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+ *     FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+ *     COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ *     INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ *     BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ *     LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+ *     CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ *     LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
+ *     ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ *     POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+
+//wels_common_defs.h
+#ifndef WELS_COMMON_DEFS_H__
+#define WELS_COMMON_DEFS_H__
+
+#include "typedefs.h"
+#include "macros.h"
+#include "codec_app_def.h"
+
+
+namespace WelsCommon {
+/*common use table*/
+
+#define  CTX_NA 0
+#define  WELS_CONTEXT_COUNT 460
+#define LEVEL_NUMBER 17
+typedef struct TagLevelLimits {
+  ELevelIdc uiLevelIdc;  // level idc
+  uint32_t uiMaxMBPS; // Max macroblock processing rate(MB/s)
+  uint32_t uiMaxFS;   // Max frame sizea(MBs)
+  uint32_t uiMaxDPBMbs;// Max decoded picture buffer size(MBs)
+  uint32_t uiMaxBR; // Max video bit rate
+  uint32_t uiMaxCPB; // Max CPB size
+  int16_t iMinVmv; // Vertical MV component range upper bound
+  int16_t iMaxVmv; // Vertical MV component range lower bound
+  uint16_t uiMinCR;  // Min compression ration
+  int16_t iMaxMvsPer2Mb; // Max number of motion vectors per two consecutive MBs
+} SLevelLimits;
+
+#define CpbBrNalFactor 1200  //baseline,main,and extended profiles.
+extern const SLevelLimits g_ksLevelLimits[LEVEL_NUMBER];
+extern const uint32_t g_kuiLevelMaps[LEVEL_NUMBER];
+extern const uint8_t g_kuiMbCountScan4Idx[24];
+extern const uint8_t g_kuiCache30ScanIdx[16];
+extern const uint8_t g_kuiCache48CountScan4Idx[24];
+
+extern const uint8_t g_kuiMatrixV[6][8][8];
+
+extern const uint8_t g_kuiDequantScaling4x4Default[2][16];
+extern const uint8_t g_kuiDequantScaling8x8Default[2][64];
+extern const ALIGNED_DECLARE (uint16_t, g_kuiDequantCoeff[52][8], 16);
+extern const ALIGNED_DECLARE (uint16_t, g_kuiDequantCoeff8x8[52][64], 16);
+extern const uint8_t g_kuiChromaQpTable[52];
+
+extern const uint8_t g_kuiCabacRangeLps[64][4];
+extern const int8_t g_kiCabacGlobalContextIdx[WELS_CONTEXT_COUNT][4][2];
+extern const uint8_t g_kuiStateTransTable[64][2];
+extern const uint32_t g_kuiGolombUELength[256];
+/*
+ *  NAL Unit Type (5 Bits)
+ */
+enum EWelsNalUnitType {
+  NAL_UNIT_UNSPEC_0             = 0,
+  NAL_UNIT_CODED_SLICE          = 1,
+  NAL_UNIT_CODED_SLICE_DPA      = 2,
+  NAL_UNIT_CODED_SLICE_DPB      = 3,
+  NAL_UNIT_CODED_SLICE_DPC      = 4,
+  NAL_UNIT_CODED_SLICE_IDR      = 5,
+  NAL_UNIT_SEI                  = 6,
+  NAL_UNIT_SPS                  = 7,
+  NAL_UNIT_PPS                  = 8,
+  NAL_UNIT_AU_DELIMITER         = 9,
+  NAL_UNIT_END_OF_SEQ           = 10,
+  NAL_UNIT_END_OF_STR           = 11,
+  NAL_UNIT_FILLER_DATA          = 12,
+  NAL_UNIT_SPS_EXT              = 13,
+  NAL_UNIT_PREFIX               = 14,
+  NAL_UNIT_SUBSET_SPS           = 15,
+  NAL_UNIT_DEPTH_PARAM          = 16, // NAL_UNIT_RESV_16
+  NAL_UNIT_RESV_17              = 17,
+  NAL_UNIT_RESV_18              = 18,
+  NAL_UNIT_AUX_CODED_SLICE      = 19,
+  NAL_UNIT_CODED_SLICE_EXT      = 20,
+  NAL_UNIT_MVC_SLICE_EXT        = 21, // NAL_UNIT_RESV_21
+  NAL_UNIT_RESV_22              = 22,
+  NAL_UNIT_RESV_23              = 23,
+  NAL_UNIT_UNSPEC_24            = 24,
+  NAL_UNIT_UNSPEC_25            = 25,
+  NAL_UNIT_UNSPEC_26            = 26,
+  NAL_UNIT_UNSPEC_27            = 27,
+  NAL_UNIT_UNSPEC_28            = 28,
+  NAL_UNIT_UNSPEC_29            = 29,
+  NAL_UNIT_UNSPEC_30            = 30,
+  NAL_UNIT_UNSPEC_31            = 31
+};
+
+/*
+ *  NAL Reference IDC (2 Bits)
+ */
+
+enum EWelsNalRefIdc {
+  NRI_PRI_LOWEST        = 0,
+  NRI_PRI_LOW           = 1,
+  NRI_PRI_HIGH          = 2,
+  NRI_PRI_HIGHEST       = 3
+};
+
+/*
+ * VCL TYPE
+ */
+
+enum EVclType {
+  NON_VCL   = 0,
+  VCL       = 1,
+  NOT_APP   = 2
+};
+
+/*
+ *  vcl type map for given NAL unit type and corresponding H264 type (0: AVC; 1: SVC).
+ */
+extern const EVclType g_keTypeMap[32][2];
+
+#define IS_VCL_NAL(t, ext_idx)                  (g_keTypeMap[t][ext_idx] == VCL)
+#define IS_PARAM_SETS_NALS(t)                   ( (t) == NAL_UNIT_SPS || (t) == NAL_UNIT_PPS || (t) == NAL_UNIT_SUBSET_SPS )
+#define IS_SPS_NAL(t)                           ( (t) == NAL_UNIT_SPS )
+#define IS_SUBSET_SPS_NAL(t)                    ( (t) == NAL_UNIT_SUBSET_SPS )
+#define IS_PPS_NAL(t)                           ( (t) == NAL_UNIT_PPS )
+#define IS_SEI_NAL(t)                           ( (t) == NAL_UNIT_SEI )
+#define IS_AU_DELIMITER_NAL(t)                  ( (t) == NAL_UNIT_AU_DELIMITER )
+#define IS_PREFIX_NAL(t)                        ( (t) == NAL_UNIT_PREFIX )
+#define IS_SUBSET_SPS_USED(t)                   ( (t) == NAL_UNIT_SUBSET_SPS || (t) == NAL_UNIT_CODED_SLICE_EXT )
+#define IS_VCL_NAL_AVC_BASE(t)                  ( (t) == NAL_UNIT_CODED_SLICE || (t) == NAL_UNIT_CODED_SLICE_IDR )
+#define IS_NEW_INTRODUCED_SVC_NAL(t)            ( (t) == NAL_UNIT_PREFIX || (t) == NAL_UNIT_CODED_SLICE_EXT )
+
+
+/* Base SSlice Types
+ * Invalid in case of eSliceType exceeds 9,
+ * Need trim when eSliceType > 4 as fixed SliceType(eSliceType-4),
+ * meaning mapped version after eSliceType minus 4.
+ */
+
+enum EWelsSliceType {
+  P_SLICE       = 0,
+  B_SLICE       = 1,
+  I_SLICE       = 2,
+  SP_SLICE      = 3,
+  SI_SLICE      = 4,
+  UNKNOWN_SLICE = 5
+};
+
+/* SSlice Types in scalable extension */
+enum ESliceTypeExt {
+  EP_SLICE = 0, // EP_SLICE: 0, 5
+  EB_SLICE = 1, // EB_SLICE: 1, 6
+  EI_SLICE = 2  // EI_SLICE: 2, 7
+};
+
+/* List Index */
+enum EListIndex {
+  LIST_0    = 0,
+  LIST_1    = 1,
+  LIST_A    = 2
+};
+
+
+
+/* Motion Vector components */
+enum EMvComp {
+  MV_X  = 0,
+  MV_Y  = 1,
+  MV_A  = 2
+};
+
+/* Chroma Components */
+
+enum EChromaComp {
+  CHROMA_CB     = 0,
+  CHROMA_CR     = 1,
+  CHROMA_A      = 2
+};
+
+
+
+/*
+ *  Memory Management Control Operation (MMCO) code
+ */
+enum EMmcoCode {
+  MMCO_END          = 0,
+  MMCO_SHORT2UNUSED = 1,
+  MMCO_LONG2UNUSED  = 2,
+  MMCO_SHORT2LONG   = 3,
+  MMCO_SET_MAX_LONG = 4,
+  MMCO_RESET        = 5,
+  MMCO_LONG         = 6
+};
+
+enum EVuiVideoFormat {
+  VUI_COMPONENT   = 0,
+  VUI_PAL         = 1,
+  VUI_NTSC        = 2,
+  VUI_SECAM       = 3,
+  VUI_MAC         = 4,
+  VUI_UNSPECIFIED = 5,
+  VUI_RESERVED1   = 6,
+  VUI_RESERVED2   = 7
+};
+
+/*
+ *  Bit-stream auxiliary reading / writing
+ */
+typedef struct TagBitStringAux {
+  uint8_t* pStartBuf;   // buffer to start position
+  uint8_t* pEndBuf;     // buffer + length
+  int32_t  iBits;       // count bits of overall bitstreaming input
+
+  intX_t   iIndex;      //only for cavlc usage
+  uint8_t* pCurBuf;     // current reading position
+  uint32_t uiCurBits;
+  int32_t  iLeftBits;   // count number of available bits left ([1, 8]),
+  // need pointer to next byte start position in case 0 bit left then 8 instead
+} SBitStringAux, *PBitStringAux;
+
+/* NAL Unix Header in AVC, refer to Page 56 in JVT X201wcm */
+typedef struct TagNalUnitHeader {
+  uint8_t             uiForbiddenZeroBit;
+  uint8_t             uiNalRefIdc;
+  EWelsNalUnitType    eNalUnitType;
+  uint8_t             uiReservedOneByte;                // only padding usage
+} SNalUnitHeader, *PNalUnitHeader;
+
+/* NAL Unit Header in scalable extension syntax, refer to Page 390 in JVT X201wcm */
+typedef struct TagNalUnitHeaderExt {
+  SNalUnitHeader      sNalUnitHeader;
+
+  // uint8_t   reserved_one_bit;
+  bool      bIdrFlag;
+  uint8_t   uiPriorityId;
+  int8_t    iNoInterLayerPredFlag;      // change as int8_t to support 3 values probably in encoder
+  uint8_t   uiDependencyId;
+
+  uint8_t   uiQualityId;
+  uint8_t   uiTemporalId;
+  bool      bUseRefBasePicFlag;
+  bool      bDiscardableFlag;
+
+  bool      bOutputFlag;
+  uint8_t   uiReservedThree2Bits;
+  // Derived variable(s)
+  uint8_t   uiLayerDqId;
+  bool      bNalExtFlag;
+} SNalUnitHeaderExt, *PNalUnitHeaderExt;
+
+/* AVC MB types*/
+#define MB_TYPE_INTRA4x4    0x00000001
+#define MB_TYPE_INTRA16x16  0x00000002
+#define MB_TYPE_INTRA8x8    0x00000004
+#define MB_TYPE_16x16       0x00000008
+#define MB_TYPE_16x8        0x00000010
+#define MB_TYPE_8x16        0x00000020
+#define MB_TYPE_8x8         0x00000040
+#define MB_TYPE_8x8_REF0    0x00000080
+#define MB_TYPE_SKIP        0x00000100
+#define MB_TYPE_INTRA_PCM   0x00000200
+#define MB_TYPE_INTRA_BL    0x00000400
+#define MB_TYPE_DIRECT      0x00000800
+#define MB_TYPE_P0L0        0x00001000
+#define MB_TYPE_P1L0        0x00002000
+#define MB_TYPE_P0L1        0x00004000
+#define MB_TYPE_P1L1        0x00008000
+#define MB_TYPE_L0        (MB_TYPE_P0L0 | MB_TYPE_P1L0)
+#define MB_TYPE_L1        (MB_TYPE_P0L1 | MB_TYPE_P1L1)
+
+#define SUB_MB_TYPE_8x8     0x00000001
+#define SUB_MB_TYPE_8x4     0x00000002
+#define SUB_MB_TYPE_4x8     0x00000004
+#define SUB_MB_TYPE_4x4     0x00000008
+
+#define MB_TYPE_INTRA     (MB_TYPE_INTRA4x4 | MB_TYPE_INTRA16x16 | MB_TYPE_INTRA8x8 | MB_TYPE_INTRA_PCM)
+#define MB_TYPE_INTER     (MB_TYPE_16x16 | MB_TYPE_16x8 | MB_TYPE_8x16 | MB_TYPE_8x8 | MB_TYPE_8x8_REF0 | MB_TYPE_SKIP | MB_TYPE_DIRECT)
+#define IS_INTRA4x4(type) ( MB_TYPE_INTRA4x4 == (type) )
+#define IS_INTRA8x8(type) ( MB_TYPE_INTRA8x8 == (type) )
+#define IS_INTRANxN(type) ( MB_TYPE_INTRA4x4 == (type) || MB_TYPE_INTRA8x8 == (type) )
+#define IS_INTRA16x16(type) ( MB_TYPE_INTRA16x16 == (type) )
+#define IS_INTRA(type) ( (type)&MB_TYPE_INTRA )
+#define IS_INTER(type) ( (type)&MB_TYPE_INTER )
+#define IS_INTER_16x16(type) ( (type)&MB_TYPE_16x16 )
+#define IS_INTER_16x8(type) ( (type)&MB_TYPE_16x8 )
+#define IS_INTER_8x16(type) ( (type)&MB_TYPE_8x16 )
+#define IS_TYPE_L0(type) ( (type)&MB_TYPE_L0 )
+#define IS_TYPE_L1(type) ( (type)&MB_TYPE_L1 )
+#define IS_DIR(a, part, list) ((a) & (MB_TYPE_P0L0<<((part)+2*(list))))
+
+
+#define IS_SKIP(type) ( (type)&MB_TYPE_SKIP )
+#define IS_DIRECT(type) ( (type)&MB_TYPE_DIRECT )
+#define IS_SVC_INTER(type) IS_INTER(type)
+#define IS_I_BL(type) ( (type) == MB_TYPE_INTRA_BL )
+#define IS_SVC_INTRA(type) ( IS_I_BL(type) || IS_INTRA(type) )
+#define IS_Inter_8x8(type) ( (type)&MB_TYPE_8x8)
+#define IS_SUB_8x8(sub_type) ((sub_type)&SUB_MB_TYPE_8x8)
+#define IS_SUB_8x4(sub_type) ((sub_type)&SUB_MB_TYPE_8x4)
+#define IS_SUB_4x8(sub_type) ((sub_type)&SUB_MB_TYPE_4x8)
+#define IS_SUB_4x4(sub_type) ((sub_type)&SUB_MB_TYPE_4x4)
+
+#define REF_NOT_AVAIL   -2
+#define REF_NOT_IN_LIST -1  //intra
+
+/////////intra16x16  Luma
+#define I16_PRED_INVALID   -1
+#define I16_PRED_V       0
+#define I16_PRED_H       1
+#define I16_PRED_DC      2
+#define I16_PRED_P       3
+
+#define I16_PRED_DC_L    4
+#define I16_PRED_DC_T    5
+#define I16_PRED_DC_128  6
+#define I16_PRED_DC_A  7
+//////////intra4x4   Luma
+// Here, I8x8 also use these definitions
+#define I4_PRED_INVALID    0
+#define I4_PRED_V        0
+#define I4_PRED_H        1
+#define I4_PRED_DC       2
+#define I4_PRED_DDL      3 //diagonal_down_left
+#define I4_PRED_DDR      4 //diagonal_down_right
+#define I4_PRED_VR       5 //vertical_right
+#define I4_PRED_HD       6 //horizon_down
+#define I4_PRED_VL       7 //vertical_left
+#define I4_PRED_HU       8 //horizon_up
+
+#define I4_PRED_DC_L     9
+#define I4_PRED_DC_T     10
+#define I4_PRED_DC_128   11
+
+#define I4_PRED_DDL_TOP  12 //right-top replacing by padding rightmost pixel of top
+#define I4_PRED_VL_TOP   13 //right-top replacing by padding rightmost pixel of top
+#define I4_PRED_A   14
+
+//////////intra Chroma
+#define C_PRED_INVALID   -1
+#define C_PRED_DC        0
+#define C_PRED_H         1
+#define C_PRED_V         2
+#define C_PRED_P         3
+
+#define C_PRED_DC_L      4
+#define C_PRED_DC_T      5
+#define C_PRED_DC_128    6
+#define C_PRED_A    7
+}
+#endif//WELS_COMMON_DEFS_H__
--- a/codec/console/dec/src/h264dec.cpp
+++ b/codec/console/dec/src/h264dec.cpp
@@ -1,485 +1,520 @@
-/*!
- * \copy
- *     Copyright (c)  2004-2013, Cisco Systems
- *     All rights reserved.
- *
- *     Redistribution and use in source and binary forms, with or without
- *     modification, are permitted provided that the following conditions
- *     are met:
- *
- *        * Redistributions of source code must retain the above copyright
- *          notice, this list of conditions and the following disclaimer.
- *
- *        * Redistributions in binary form must reproduce the above copyright
- *          notice, this list of conditions and the following disclaimer in
- *          the documentation and/or other materials provided with the
- *          distribution.
- *
- *     THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- *     "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- *     LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
- *     FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
- *     COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
- *     INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
- *     BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
- *     LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
- *     CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- *     LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
- *     ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- *     POSSIBILITY OF SUCH DAMAGE.
- *
- * h264dec.cpp:         Wels Decoder Console Implementation file
- */
-
-#if defined (_WIN32)
-#define _CRT_SECURE_NO_WARNINGS
-#include <windows.h>
-#include <tchar.h>
-#else
-#include <string.h>
-#endif
-#include <stdio.h>
-#include <stdlib.h>
-#include <stdarg.h>
-#if defined (ANDROID_NDK)
-#include <android/log.h>
-#endif
-#include "codec_def.h"
-#include "codec_app_def.h"
-#include "codec_api.h"
-#include "read_config.h"
-#include "typedefs.h"
-#include "measure_time.h"
-#include "d3d9_utils.h"
-
-
-using namespace std;
-
-#if defined (WINDOWS_PHONE)
-double g_dDecTime = 0.0;
-float  g_fDecFPS = 0.0;
-int    g_iDecodedFrameNum = 0;
-#endif
-
-#if defined(ANDROID_NDK)
-#define LOG_TAG "welsdec"
-#define LOGI(...) __android_log_print(ANDROID_LOG_DEBUG, LOG_TAG, __VA_ARGS__)
-#define printf LOGI
-#define fprintf(a, ...) LOGI(__VA_ARGS__)
-#endif
-//using namespace WelsDec;
-
-void H264DecodeInstance (ISVCDecoder* pDecoder, const char* kpH264FileName, const char* kpOuputFileName,
-                         int32_t& iWidth, int32_t& iHeight, const char* pOptionFileName, const char* pLengthFileName,
-                         int32_t iErrorConMethod,
-                         bool bLegacyCalling ) {
-  FILE* pH264File   = NULL;
-  FILE* pYuvFile    = NULL;
-  FILE* pOptionFile = NULL;
-// Lenght input mode support
-  FILE* fpTrack = NULL;
-
-  if (pDecoder == NULL) return;
-
-  int32_t pInfo[4];
-  unsigned long long uiTimeStamp = 0;
-  int64_t iStart = 0, iEnd = 0, iTotal = 0;
-  int32_t iSliceSize;
-  int32_t iSliceIndex = 0;
-  uint8_t* pBuf = NULL;
-  uint8_t uiStartCode[4] = {0, 0, 0, 1};
-
-  uint8_t* pData[3] = {NULL};
-  uint8_t* pDst[3] = {NULL};
-  SBufferInfo sDstBufInfo;
-
-  int32_t iBufPos = 0;
-  int32_t iFileSize;
-  int32_t i = 0;
-  int32_t iLastWidth = 0, iLastHeight = 0;
-  int32_t iFrameCount = 0;
-  int32_t iEndOfStreamFlag = 0;
-  pDecoder->SetOption (DECODER_OPTION_ERROR_CON_IDC, &iErrorConMethod);
-  CUtils cOutputModule;
-  double dElapsed = 0;
-
-  if (kpH264FileName) {
-    pH264File = fopen (kpH264FileName, "rb");
-    if (pH264File == NULL) {
-      fprintf (stderr, "Can not open h264 source file, check its legal path related please..\n");
-      return;
-    }
-    fprintf (stderr, "H264 source file name: %s..\n", kpH264FileName);
-  } else {
-    fprintf (stderr, "Can not find any h264 bitstream file to read..\n");
-    fprintf (stderr, "----------------decoder return------------------------\n");
-    return;
-  }
-
-  if (kpOuputFileName) {
-    pYuvFile = fopen (kpOuputFileName, "wb");
-    if (pYuvFile == NULL) {
-      fprintf (stderr, "Can not open yuv file to output result of decoding..\n");
-      // any options
-      //return; // can let decoder work in quiet mode, no writing any output
-    } else
-      fprintf (stderr, "Sequence output file name: %s..\n", kpOuputFileName);
-  } else {
-    fprintf (stderr, "Can not find any output file to write..\n");
-    // any options
-  }
-
-  if (pOptionFileName) {
-    pOptionFile = fopen (pOptionFileName, "wb");
-    if (pOptionFile == NULL) {
-      fprintf (stderr, "Can not open optional file for write..\n");
-    } else
-      fprintf (stderr, "Extra optional file: %s..\n", pOptionFileName);
-  }
-
-  if (pLengthFileName != NULL) {
-    fpTrack = fopen (pLengthFileName, "rb");
-    if (fpTrack == NULL)
-      printf ("Length file open ERROR!\n");
-  }
-
-  printf ("------------------------------------------------------\n");
-
-  fseek (pH264File, 0L, SEEK_END);
-  iFileSize = (int32_t) ftell (pH264File);
-  if (iFileSize <= 0) {
-    fprintf (stderr, "Current Bit Stream File is too small, read error!!!!\n");
-    goto label_exit;
-  }
-  fseek (pH264File, 0L, SEEK_SET);
-
-  pBuf = new uint8_t[iFileSize + 4];
-  if (pBuf == NULL) {
-    fprintf (stderr, "new buffer failed!\n");
-    goto label_exit;
-  }
-
-  if (fread (pBuf, 1, iFileSize, pH264File) != (uint32_t)iFileSize) {
-    fprintf (stderr, "Unable to read whole file\n");
-    goto label_exit;
-  }
-
-  memcpy (pBuf + iFileSize, &uiStartCode[0], 4); //confirmed_safe_unsafe_usage
-
-  while (true) {
-
-    if (iBufPos >= iFileSize) {
-      iEndOfStreamFlag = true;
-      if (iEndOfStreamFlag)
-        pDecoder->SetOption (DECODER_OPTION_END_OF_STREAM, (void*)&iEndOfStreamFlag);
-      break;
-    }
-// Read length from file if needed
-    if (fpTrack) {
-      if (fread (pInfo, 4, sizeof (int32_t), fpTrack) < 4)
-        goto label_exit;
-      iSliceSize = static_cast<int32_t> (pInfo[2]);
-    } else {
-      for (i = 0; i < iFileSize; i++) {
-        if ((pBuf[iBufPos + i] == 0 && pBuf[iBufPos + i + 1] == 0 && pBuf[iBufPos + i + 2] == 0 && pBuf[iBufPos + i + 3] == 1
-             && i > 0) || (pBuf[iBufPos + i] == 0 && pBuf[iBufPos + i + 1] == 0 && pBuf[iBufPos + i + 2] == 1 && i > 0)) {
-          break;
-        }
-      }
-      iSliceSize = i;
-    }
-    if (iSliceSize < 4) { //too small size, no effective data, ignore
-      iBufPos += iSliceSize;
-      continue;
-    }
-
-//for coverage test purpose
-    int32_t iEndOfStreamFlag;
-    pDecoder->GetOption (DECODER_OPTION_END_OF_STREAM, &iEndOfStreamFlag);
-    int32_t iCurIdrPicId;
-    pDecoder->GetOption (DECODER_OPTION_IDR_PIC_ID, &iCurIdrPicId);
-    int32_t iFrameNum;
-    pDecoder->GetOption (DECODER_OPTION_FRAME_NUM, &iFrameNum);
-    int32_t bCurAuContainLtrMarkSeFlag;
-    pDecoder->GetOption (DECODER_OPTION_LTR_MARKING_FLAG, &bCurAuContainLtrMarkSeFlag);
-    int32_t iFrameNumOfAuMarkedLtr;
-    pDecoder->GetOption (DECODER_OPTION_LTR_MARKED_FRAME_NUM, &iFrameNumOfAuMarkedLtr);
-    int32_t iFeedbackVclNalInAu;
-    pDecoder->GetOption (DECODER_OPTION_VCL_NAL, &iFeedbackVclNalInAu);
-    int32_t iFeedbackTidInAu;
-    pDecoder->GetOption (DECODER_OPTION_TEMPORAL_ID, &iFeedbackTidInAu);
-//~end for
-
-    iStart = WelsTime();
-    pData[0] = NULL;
-    pData[1] = NULL;
-    pData[2] = NULL;
-    uiTimeStamp ++;
-    memset (&sDstBufInfo, 0, sizeof (SBufferInfo));
-    sDstBufInfo.uiInBsTimeStamp = uiTimeStamp;
-    if (!bLegacyCalling) {
-      pDecoder->DecodeFrameNoDelay (pBuf + iBufPos, iSliceSize, pData, &sDstBufInfo);
-    } else {
-      pDecoder->DecodeFrame2 (pBuf + iBufPos, iSliceSize, pData, &sDstBufInfo);
-    }
-
-    if (sDstBufInfo.iBufferStatus == 1) {
-      pDst[0] = pData[0];
-      pDst[1] = pData[1];
-      pDst[2] = pData[2];
-    }
-    iEnd    = WelsTime();
-    iTotal += iEnd - iStart;
-    if (sDstBufInfo.iBufferStatus == 1) {
-      cOutputModule.Process ((void**)pDst, &sDstBufInfo, pYuvFile);
-      iWidth  = sDstBufInfo.UsrData.sSystemBuffer.iWidth;
-      iHeight = sDstBufInfo.UsrData.sSystemBuffer.iHeight;
-
-      if (pOptionFile != NULL) {
-        if (iWidth != iLastWidth && iHeight != iLastHeight) {
-          fwrite (&iFrameCount, sizeof (iFrameCount), 1, pOptionFile);
-          fwrite (&iWidth , sizeof (iWidth) , 1, pOptionFile);
-          fwrite (&iHeight, sizeof (iHeight), 1, pOptionFile);
-          iLastWidth  = iWidth;
-          iLastHeight = iHeight;
-        }
-      }
-      ++ iFrameCount;
-    }
-
-    if (bLegacyCalling) {
-      iStart = WelsTime();
-      pData[0] = NULL;
-      pData[1] = NULL;
-      pData[2] = NULL;
-      memset (&sDstBufInfo, 0, sizeof (SBufferInfo));
-      sDstBufInfo.uiInBsTimeStamp = uiTimeStamp;
-      pDecoder->DecodeFrame2 (NULL, 0, pData, &sDstBufInfo);
-      if (sDstBufInfo.iBufferStatus == 1) {
-        pDst[0] = pData[0];
-        pDst[1] = pData[1];
-        pDst[2] = pData[2];
-      }
-      iEnd    = WelsTime();
-      iTotal += iEnd - iStart;
-      if (sDstBufInfo.iBufferStatus == 1) {
-        cOutputModule.Process ((void**)pDst, &sDstBufInfo, pYuvFile);
-        iWidth  = sDstBufInfo.UsrData.sSystemBuffer.iWidth;
-        iHeight = sDstBufInfo.UsrData.sSystemBuffer.iHeight;
-
-        if (pOptionFile != NULL) {
-          if (iWidth != iLastWidth && iHeight != iLastHeight) {
-            fwrite (&iFrameCount, sizeof (iFrameCount), 1, pOptionFile);
-            fwrite (&iWidth , sizeof (iWidth) , 1, pOptionFile);
-            fwrite (&iHeight, sizeof (iHeight), 1, pOptionFile);
-            iLastWidth  = iWidth;
-            iLastHeight = iHeight;
-          }
-        }
-        ++ iFrameCount;
-      }
-    }
-    iBufPos += iSliceSize;
-    ++ iSliceIndex;
-  }
-
-  dElapsed = iTotal / 1e6;
-  fprintf (stderr, "-------------------------------------------------------\n");
-  fprintf (stderr, "iWidth:\t\t%d\nheight:\t\t%d\nFrames:\t\t%d\ndecode time:\t%f sec\nFPS:\t\t%f fps\n",
-           iWidth, iHeight, iFrameCount, dElapsed, (iFrameCount * 1.0) / dElapsed);
-  fprintf (stderr, "-------------------------------------------------------\n");
-
-#if defined (WINDOWS_PHONE)
-  g_dDecTime = dElapsed;
-  g_fDecFPS = (iFrameCount * 1.0f) / (float) dElapsed;
-  g_iDecodedFrameNum = iFrameCount;
-#endif
-
-  // coverity scan uninitial
-label_exit:
-  if (pBuf) {
-    delete[] pBuf;
-    pBuf = NULL;
-  }
-  if (pH264File) {
-    fclose (pH264File);
-    pH264File = NULL;
-  }
-  if (pYuvFile) {
-    fclose (pYuvFile);
-    pYuvFile = NULL;
-  }
-  if (pOptionFile) {
-    fclose (pOptionFile);
-    pOptionFile = NULL;
-  }
-  if (fpTrack) {
-    fclose (fpTrack);
-    fpTrack = NULL;
-  }
-
-}
-
-#if (defined(ANDROID_NDK)||defined(APPLE_IOS) || defined (WINDOWS_PHONE))
-int32_t DecMain (int32_t iArgC, char* pArgV[]) {
-#else
-int32_t main (int32_t iArgC, char* pArgV[]) {
-#endif
-  ISVCDecoder* pDecoder = NULL;
-
-  SDecodingParam sDecParam = {0};
-  string strInputFile (""), strOutputFile (""), strOptionFile (""), strLengthFile ("");
-  int iLevelSetting = (int) WELS_LOG_WARNING;
-  bool bLegacyCalling = false;
-
-  sDecParam.sVideoProperty.size = sizeof (sDecParam.sVideoProperty);
-  sDecParam.eEcActiveIdc = ERROR_CON_SLICE_MV_COPY_CROSS_IDR_FREEZE_RES_CHANGE;
-
-  if (iArgC < 2) {
-    printf ("usage 1: h264dec.exe welsdec.cfg\n");
-    printf ("usage 2: h264dec.exe welsdec.264 out.yuv\n");
-    printf ("usage 3: h264dec.exe welsdec.264\n");
-    return 1;
-  } else if (iArgC == 2) {
-    if (strstr (pArgV[1], ".cfg")) { // read config file //confirmed_safe_unsafe_usage
-      CReadConfig cReadCfg (pArgV[1]);
-      string strTag[4];
-      string strReconFile ("");
-
-      if (!cReadCfg.ExistFile()) {
-        printf ("Specified file: %s not exist, maybe invalid path or parameter settting.\n", cReadCfg.GetFileName().c_str());
-        return 1;
-      }
-
-      while (!cReadCfg.EndOfFile()) {
-        long nRd = cReadCfg.ReadLine (&strTag[0]);
-        if (nRd > 0) {
-          if (strTag[0].compare ("InputFile") == 0) {
-            strInputFile = strTag[1];
-          } else if (strTag[0].compare ("OutputFile") == 0) {
-            strOutputFile = strTag[1];
-          } else if (strTag[0].compare ("RestructionFile") == 0) {
-            strReconFile = strTag[1];
-            int32_t iLen = (int32_t)strReconFile.length();
-            sDecParam.pFileNameRestructed = new char[iLen + 1];
-            if (sDecParam.pFileNameRestructed != NULL) {
-              sDecParam.pFileNameRestructed[iLen] = 0;
-            }
-
-            strncpy (sDecParam.pFileNameRestructed, strReconFile.c_str(), iLen); //confirmed_safe_unsafe_usage
-          } else if (strTag[0].compare ("TargetDQID") == 0) {
-            sDecParam.uiTargetDqLayer = (uint8_t)atol (strTag[1].c_str());
-          } else if (strTag[0].compare ("ErrorConcealmentIdc") == 0) {
-            sDecParam.eEcActiveIdc = (ERROR_CON_IDC)atol (strTag[1].c_str());
-          } else if (strTag[0].compare ("CPULoad") == 0) {
-            sDecParam.uiCpuLoad = (uint32_t)atol (strTag[1].c_str());
-          } else if (strTag[0].compare ("VideoBitstreamType") == 0) {
-            sDecParam.sVideoProperty.eVideoBsType = (VIDEO_BITSTREAM_TYPE)atol (strTag[1].c_str());
-          }
-        }
-      }
-      if (strOutputFile.empty()) {
-        printf ("No output file specified in configuration file.\n");
-        return 1;
-      }
-    } else if (strstr (pArgV[1],
-                       ".264")) { // no output dump yuv file, just try to render the decoded pictures //confirmed_safe_unsafe_usage
-      strInputFile = pArgV[1];
-      sDecParam.uiTargetDqLayer = (uint8_t) - 1;
-      sDecParam.eEcActiveIdc = ERROR_CON_SLICE_COPY;
-      sDecParam.sVideoProperty.eVideoBsType = VIDEO_BITSTREAM_DEFAULT;
-    }
-  } else { //iArgC > 2
-    strInputFile = pArgV[1];
-    strOutputFile = pArgV[2];
-    sDecParam.uiTargetDqLayer = (uint8_t) - 1;
-    sDecParam.eEcActiveIdc = ERROR_CON_SLICE_COPY;
-    sDecParam.sVideoProperty.eVideoBsType = VIDEO_BITSTREAM_DEFAULT;
-    if (iArgC > 3) {
-      for (int i = 3; i < iArgC; i++) {
-        char* cmd = pArgV[i];
-
-        if (!strcmp (cmd, "-options")) {
-          if (i + 1 < iArgC)
-            strOptionFile = pArgV[++i];
-          else {
-            printf ("options file not specified.\n");
-            return 1;
-          }
-        } else if (!strcmp (cmd, "-trace")) {
-          if (i + 1 < iArgC)
-            iLevelSetting = atoi (pArgV[++i]);
-          else {
-            printf ("trace level not specified.\n");
-            return 1;
-          }
-        } else if (!strcmp (cmd, "-length")) {
-          if (i + 1 < iArgC)
-            strLengthFile = pArgV[++i];
-          else {
-            printf ("lenght file not specified.\n");
-            return 1;
-          }
-        } else if (!strcmp (cmd, "-ec")) {
-          if (i + 1 < iArgC) {
-            int iEcActiveIdc = atoi (pArgV[++i]);
-            sDecParam.eEcActiveIdc = (ERROR_CON_IDC)iEcActiveIdc;
-            printf ("ERROR_CON(cealment) is set to %d.\n", iEcActiveIdc);
-          }
-        } else if (!strcmp (cmd, "-legacy")) {
-          bLegacyCalling = true;
-        }
-      }
-    }
-
-    if (strOutputFile.empty()) {
-      printf ("No output file specified in configuration file.\n");
-      return 1;
-    }
-  }
-
-  if (strInputFile.empty()) {
-    printf ("No input file specified in configuration file.\n");
-    return 1;
-  }
-
-
-
-
-  if (WelsCreateDecoder (&pDecoder)  || (NULL == pDecoder)) {
-    printf ("Create Decoder failed.\n");
-    return 1;
-  }
-  if (iLevelSetting >= 0) {
-    pDecoder->SetOption (DECODER_OPTION_TRACE_LEVEL, &iLevelSetting);
-  }
-
-  if (pDecoder->Initialize (&sDecParam)) {
-    printf ("Decoder initialization failed.\n");
-    return 1;
-  }
-
-
-  int32_t iWidth = 0;
-  int32_t iHeight = 0;
-
-
-  H264DecodeInstance (pDecoder, strInputFile.c_str(), !strOutputFile.empty() ? strOutputFile.c_str() : NULL, iWidth,
-                      iHeight,
-                      (!strOptionFile.empty() ? strOptionFile.c_str() : NULL), (!strLengthFile.empty() ? strLengthFile.c_str() : NULL),
-                      (int32_t)sDecParam.eEcActiveIdc,
-                      bLegacyCalling);
-
-  if (sDecParam.pFileNameRestructed != NULL) {
-    delete []sDecParam.pFileNameRestructed;
-    sDecParam.pFileNameRestructed = NULL;
-  }
-
-  if (pDecoder) {
-    pDecoder->Uninitialize();
-
-    WelsDestroyDecoder (pDecoder);
-  }
-
-  return 0;
-}
+/*!
+ * \copy
+ *     Copyright (c)  2004-2013, Cisco Systems
+ *     All rights reserved.
+ *
+ *     Redistribution and use in source and binary forms, with or without
+ *     modification, are permitted provided that the following conditions
+ *     are met:
+ *
+ *        * Redistributions of source code must retain the above copyright
+ *          notice, this list of conditions and the following disclaimer.
+ *
+ *        * Redistributions in binary form must reproduce the above copyright
+ *          notice, this list of conditions and the following disclaimer in
+ *          the documentation and/or other materials provided with the
+ *          distribution.
+ *
+ *     THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *     "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *     LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+ *     FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+ *     COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ *     INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ *     BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ *     LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+ *     CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ *     LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
+ *     ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ *     POSSIBILITY OF SUCH DAMAGE.
+ *
+ * h264dec.cpp:         Wels Decoder Console Implementation file
+ */
+
+#if defined (_WIN32)
+#define _CRT_SECURE_NO_WARNINGS
+#include <windows.h>
+#include <tchar.h>
+#else
+#include <string.h>
+#endif
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdarg.h>
+#if defined (ANDROID_NDK)
+#include <android/log.h>
+#endif
+#include "codec_def.h"
+#include "codec_app_def.h"
+#include "codec_api.h"
+#include "read_config.h"
+#include "typedefs.h"
+#include "measure_time.h"
+#include "d3d9_utils.h"
+
+
+using namespace std;
+
+#if defined (WINDOWS_PHONE)
+double g_dDecTime = 0.0;
+float  g_fDecFPS = 0.0;
+int    g_iDecodedFrameNum = 0;
+#endif
+
+#if defined(ANDROID_NDK)
+#define LOG_TAG "welsdec"
+#define LOGI(...) __android_log_print(ANDROID_LOG_DEBUG, LOG_TAG, __VA_ARGS__)
+#define printf LOGI
+#define fprintf(a, ...) LOGI(__VA_ARGS__)
+#endif
+//using namespace WelsDec;
+
+void H264DecodeInstance (ISVCDecoder* pDecoder, const char* kpH264FileName, const char* kpOuputFileName,
+                         int32_t& iWidth, int32_t& iHeight, const char* pOptionFileName, const char* pLengthFileName,
+                         int32_t iErrorConMethod,
+                         bool bLegacyCalling) {
+  FILE* pH264File   = NULL;
+  FILE* pYuvFile    = NULL;
+  FILE* pOptionFile = NULL;
+// Lenght input mode support
+  FILE* fpTrack = NULL;
+
+  if (pDecoder == NULL) return;
+
+  int32_t pInfo[4];
+  unsigned long long uiTimeStamp = 0;
+  int64_t iStart = 0, iEnd = 0, iTotal = 0;
+  int32_t iSliceSize;
+  int32_t iSliceIndex = 0;
+  uint8_t* pBuf = NULL;
+  uint8_t uiStartCode[4] = {0, 0, 0, 1};
+
+  uint8_t* pData[3] = {NULL};
+  uint8_t* pDst[3] = {NULL};
+  SBufferInfo sDstBufInfo;
+
+  int32_t iBufPos = 0;
+  int32_t iFileSize;
+  int32_t i = 0;
+  int32_t iLastWidth = 0, iLastHeight = 0;
+  int32_t iFrameCount = 0;
+  int32_t iEndOfStreamFlag = 0;
+  int32_t num_of_frames_in_buffer = 0;
+  pDecoder->SetOption (DECODER_OPTION_ERROR_CON_IDC, &iErrorConMethod);
+  CUtils cOutputModule;
+  double dElapsed = 0;
+
+  if (kpH264FileName) {
+    pH264File = fopen (kpH264FileName, "rb");
+    if (pH264File == NULL) {
+      fprintf (stderr, "Can not open h264 source file, check its legal path related please..\n");
+      return;
+    }
+    fprintf (stderr, "H264 source file name: %s..\n", kpH264FileName);
+  } else {
+    fprintf (stderr, "Can not find any h264 bitstream file to read..\n");
+    fprintf (stderr, "----------------decoder return------------------------\n");
+    return;
+  }
+
+  if (kpOuputFileName) {
+    pYuvFile = fopen (kpOuputFileName, "wb");
+    if (pYuvFile == NULL) {
+      fprintf (stderr, "Can not open yuv file to output result of decoding..\n");
+      // any options
+      //return; // can let decoder work in quiet mode, no writing any output
+    } else
+      fprintf (stderr, "Sequence output file name: %s..\n", kpOuputFileName);
+  } else {
+    fprintf (stderr, "Can not find any output file to write..\n");
+    // any options
+  }
+
+  if (pOptionFileName) {
+    pOptionFile = fopen (pOptionFileName, "wb");
+    if (pOptionFile == NULL) {
+      fprintf (stderr, "Can not open optional file for write..\n");
+    } else
+      fprintf (stderr, "Extra optional file: %s..\n", pOptionFileName);
+  }
+
+  if (pLengthFileName != NULL) {
+    fpTrack = fopen (pLengthFileName, "rb");
+    if (fpTrack == NULL)
+      printf ("Length file open ERROR!\n");
+  }
+
+  printf ("------------------------------------------------------\n");
+
+  fseek (pH264File, 0L, SEEK_END);
+  iFileSize = (int32_t) ftell (pH264File);
+  if (iFileSize <= 0) {
+    fprintf (stderr, "Current Bit Stream File is too small, read error!!!!\n");
+    goto label_exit;
+  }
+  fseek (pH264File, 0L, SEEK_SET);
+
+  pBuf = new uint8_t[iFileSize + 4];
+  if (pBuf == NULL) {
+    fprintf (stderr, "new buffer failed!\n");
+    goto label_exit;
+  }
+
+  if (fread (pBuf, 1, iFileSize, pH264File) != (uint32_t)iFileSize) {
+    fprintf (stderr, "Unable to read whole file\n");
+    goto label_exit;
+  }
+
+  memcpy (pBuf + iFileSize, &uiStartCode[0], 4); //confirmed_safe_unsafe_usage
+
+  while (true) {
+
+    if (iBufPos >= iFileSize) {
+      iEndOfStreamFlag = true;
+      if (iEndOfStreamFlag)
+        pDecoder->SetOption (DECODER_OPTION_END_OF_STREAM, (void*)&iEndOfStreamFlag);
+      break;
+    }
+// Read length from file if needed
+    if (fpTrack) {
+      if (fread (pInfo, 4, sizeof (int32_t), fpTrack) < 4)
+        goto label_exit;
+      iSliceSize = static_cast<int32_t> (pInfo[2]);
+    } else {
+      for (i = 0; i < iFileSize; i++) {
+        if ((pBuf[iBufPos + i] == 0 && pBuf[iBufPos + i + 1] == 0 && pBuf[iBufPos + i + 2] == 0 && pBuf[iBufPos + i + 3] == 1
+             && i > 0) || (pBuf[iBufPos + i] == 0 && pBuf[iBufPos + i + 1] == 0 && pBuf[iBufPos + i + 2] == 1 && i > 0)) {
+          break;
+        }
+      }
+      iSliceSize = i;
+    }
+    if (iSliceSize < 4) { //too small size, no effective data, ignore
+      iBufPos += iSliceSize;
+      continue;
+    }
+
+//for coverage test purpose
+    int32_t iEndOfStreamFlag;
+    pDecoder->GetOption (DECODER_OPTION_END_OF_STREAM, &iEndOfStreamFlag);
+    int32_t iCurIdrPicId;
+    pDecoder->GetOption (DECODER_OPTION_IDR_PIC_ID, &iCurIdrPicId);
+    int32_t iFrameNum;
+    pDecoder->GetOption (DECODER_OPTION_FRAME_NUM, &iFrameNum);
+    int32_t bCurAuContainLtrMarkSeFlag;
+    pDecoder->GetOption (DECODER_OPTION_LTR_MARKING_FLAG, &bCurAuContainLtrMarkSeFlag);
+    int32_t iFrameNumOfAuMarkedLtr;
+    pDecoder->GetOption (DECODER_OPTION_LTR_MARKED_FRAME_NUM, &iFrameNumOfAuMarkedLtr);
+    int32_t iFeedbackVclNalInAu;
+    pDecoder->GetOption (DECODER_OPTION_VCL_NAL, &iFeedbackVclNalInAu);
+    int32_t iFeedbackTidInAu;
+    pDecoder->GetOption (DECODER_OPTION_TEMPORAL_ID, &iFeedbackTidInAu);
+//~end for
+
+    iStart = WelsTime();
+    pData[0] = NULL;
+    pData[1] = NULL;
+    pData[2] = NULL;
+    uiTimeStamp ++;
+    memset (&sDstBufInfo, 0, sizeof (SBufferInfo));
+    sDstBufInfo.uiInBsTimeStamp = uiTimeStamp;
+    if (!bLegacyCalling) {
+      pDecoder->DecodeFrameNoDelay (pBuf + iBufPos, iSliceSize, pData, &sDstBufInfo);
+    } else {
+      pDecoder->DecodeFrame2 (pBuf + iBufPos, iSliceSize, pData, &sDstBufInfo);
+    }
+
+    if (sDstBufInfo.iBufferStatus == 1) {
+      pDst[0] = pData[0];
+      pDst[1] = pData[1];
+      pDst[2] = pData[2];
+    }
+    iEnd    = WelsTime();
+    iTotal += iEnd - iStart;
+    if (sDstBufInfo.iBufferStatus == 1) {
+      cOutputModule.Process ((void**)pDst, &sDstBufInfo, pYuvFile);
+      iWidth  = sDstBufInfo.UsrData.sSystemBuffer.iWidth;
+      iHeight = sDstBufInfo.UsrData.sSystemBuffer.iHeight;
+
+      if (pOptionFile != NULL) {
+        if (iWidth != iLastWidth && iHeight != iLastHeight) {
+          fwrite (&iFrameCount, sizeof (iFrameCount), 1, pOptionFile);
+          fwrite (&iWidth, sizeof (iWidth), 1, pOptionFile);
+          fwrite (&iHeight, sizeof (iHeight), 1, pOptionFile);
+          iLastWidth  = iWidth;
+          iLastHeight = iHeight;
+        }
+      }
+      ++ iFrameCount;
+    }
+
+    if (bLegacyCalling) {
+      iStart = WelsTime();
+      pData[0] = NULL;
+      pData[1] = NULL;
+      pData[2] = NULL;
+      memset (&sDstBufInfo, 0, sizeof (SBufferInfo));
+      sDstBufInfo.uiInBsTimeStamp = uiTimeStamp;
+      pDecoder->DecodeFrame2 (NULL, 0, pData, &sDstBufInfo);
+      if (sDstBufInfo.iBufferStatus == 1) {
+        pDst[0] = pData[0];
+        pDst[1] = pData[1];
+        pDst[2] = pData[2];
+      }
+      iEnd    = WelsTime();
+      iTotal += iEnd - iStart;
+      if (sDstBufInfo.iBufferStatus == 1) {
+        cOutputModule.Process ((void**)pDst, &sDstBufInfo, pYuvFile);
+        iWidth  = sDstBufInfo.UsrData.sSystemBuffer.iWidth;
+        iHeight = sDstBufInfo.UsrData.sSystemBuffer.iHeight;
+
+        if (pOptionFile != NULL) {
+          if (iWidth != iLastWidth && iHeight != iLastHeight) {
+            fwrite (&iFrameCount, sizeof (iFrameCount), 1, pOptionFile);
+            fwrite (&iWidth, sizeof (iWidth), 1, pOptionFile);
+            fwrite (&iHeight, sizeof (iHeight), 1, pOptionFile);
+            iLastWidth  = iWidth;
+            iLastHeight = iHeight;
+          }
+        }
+        ++ iFrameCount;
+      }
+    }
+    iBufPos += iSliceSize;
+    ++ iSliceIndex;
+  }
+
+  pDecoder->GetOption (DECODER_OPTION_NUM_OF_FRAMES_REMAINING_IN_BUFFER, &num_of_frames_in_buffer);
+  for (int32_t i = 0; i < num_of_frames_in_buffer; ++i) {
+    iStart = WelsTime();
+    pData[0] = NULL;
+    pData[1] = NULL;
+    pData[2] = NULL;
+    memset (&sDstBufInfo, 0, sizeof (SBufferInfo));
+    sDstBufInfo.uiInBsTimeStamp = uiTimeStamp;
+    sDstBufInfo.iBufferStatus = 1;
+    pDecoder->FlushFrame (pData, &sDstBufInfo);
+    if (sDstBufInfo.iBufferStatus == 1) {
+      pDst[0] = pData[0];
+      pDst[1] = pData[1];
+      pDst[2] = pData[2];
+    }
+    iEnd = WelsTime();
+    iTotal += iEnd - iStart;
+    if (sDstBufInfo.iBufferStatus == 1) {
+      cOutputModule.Process ((void**)pDst, &sDstBufInfo, pYuvFile);
+      iWidth = sDstBufInfo.UsrData.sSystemBuffer.iWidth;
+      iHeight = sDstBufInfo.UsrData.sSystemBuffer.iHeight;
+
+      if (pOptionFile != NULL) {
+        if (iWidth != iLastWidth && iHeight != iLastHeight) {
+          fwrite (&iFrameCount, sizeof (iFrameCount), 1, pOptionFile);
+          fwrite (&iWidth, sizeof (iWidth), 1, pOptionFile);
+          fwrite (&iHeight, sizeof (iHeight), 1, pOptionFile);
+          iLastWidth = iWidth;
+          iLastHeight = iHeight;
+        }
+      }
+      ++iFrameCount;
+    }
+  }
+  dElapsed = iTotal / 1e6;
+  fprintf (stderr, "-------------------------------------------------------\n");
+  fprintf (stderr, "iWidth:\t\t%d\nheight:\t\t%d\nFrames:\t\t%d\ndecode time:\t%f sec\nFPS:\t\t%f fps\n",
+           iWidth, iHeight, iFrameCount, dElapsed, (iFrameCount * 1.0) / dElapsed);
+  fprintf (stderr, "-------------------------------------------------------\n");
+
+#if defined (WINDOWS_PHONE)
+  g_dDecTime = dElapsed;
+  g_fDecFPS = (iFrameCount * 1.0f) / (float) dElapsed;
+  g_iDecodedFrameNum = iFrameCount;
+#endif
+
+  // coverity scan uninitial
+label_exit:
+  if (pBuf) {
+    delete[] pBuf;
+    pBuf = NULL;
+  }
+  if (pH264File) {
+    fclose (pH264File);
+    pH264File = NULL;
+  }
+  if (pYuvFile) {
+    fclose (pYuvFile);
+    pYuvFile = NULL;
+  }
+  if (pOptionFile) {
+    fclose (pOptionFile);
+    pOptionFile = NULL;
+  }
+  if (fpTrack) {
+    fclose (fpTrack);
+    fpTrack = NULL;
+  }
+
+}
+
+#if (defined(ANDROID_NDK)||defined(APPLE_IOS) || defined (WINDOWS_PHONE))
+int32_t DecMain (int32_t iArgC, char* pArgV[]) {
+#else
+int32_t main (int32_t iArgC, char* pArgV[]) {
+#endif
+  ISVCDecoder* pDecoder = NULL;
+
+  SDecodingParam sDecParam = {0};
+  string strInputFile (""), strOutputFile (""), strOptionFile (""), strLengthFile ("");
+  int iLevelSetting = (int) WELS_LOG_WARNING;
+  bool bLegacyCalling = false;
+
+  sDecParam.sVideoProperty.size = sizeof (sDecParam.sVideoProperty);
+  sDecParam.eEcActiveIdc = ERROR_CON_SLICE_MV_COPY_CROSS_IDR_FREEZE_RES_CHANGE;
+
+  if (iArgC < 2) {
+    printf ("usage 1: h264dec.exe welsdec.cfg\n");
+    printf ("usage 2: h264dec.exe welsdec.264 out.yuv\n");
+    printf ("usage 3: h264dec.exe welsdec.264\n");
+    return 1;
+  } else if (iArgC == 2) {
+    if (strstr (pArgV[1], ".cfg")) { // read config file //confirmed_safe_unsafe_usage
+      CReadConfig cReadCfg (pArgV[1]);
+      string strTag[4];
+      string strReconFile ("");
+
+      if (!cReadCfg.ExistFile()) {
+        printf ("Specified file: %s not exist, maybe invalid path or parameter settting.\n", cReadCfg.GetFileName().c_str());
+        return 1;
+      }
+
+      while (!cReadCfg.EndOfFile()) {
+        long nRd = cReadCfg.ReadLine (&strTag[0]);
+        if (nRd > 0) {
+          if (strTag[0].compare ("InputFile") == 0) {
+            strInputFile = strTag[1];
+          } else if (strTag[0].compare ("OutputFile") == 0) {
+            strOutputFile = strTag[1];
+          } else if (strTag[0].compare ("RestructionFile") == 0) {
+            strReconFile = strTag[1];
+            int32_t iLen = (int32_t)strReconFile.length();
+            sDecParam.pFileNameRestructed = new char[iLen + 1];
+            if (sDecParam.pFileNameRestructed != NULL) {
+              sDecParam.pFileNameRestructed[iLen] = 0;
+            }
+
+            strncpy (sDecParam.pFileNameRestructed, strReconFile.c_str(), iLen); //confirmed_safe_unsafe_usage
+          } else if (strTag[0].compare ("TargetDQID") == 0) {
+            sDecParam.uiTargetDqLayer = (uint8_t)atol (strTag[1].c_str());
+          } else if (strTag[0].compare ("ErrorConcealmentIdc") == 0) {
+            sDecParam.eEcActiveIdc = (ERROR_CON_IDC)atol (strTag[1].c_str());
+          } else if (strTag[0].compare ("CPULoad") == 0) {
+            sDecParam.uiCpuLoad = (uint32_t)atol (strTag[1].c_str());
+          } else if (strTag[0].compare ("VideoBitstreamType") == 0) {
+            sDecParam.sVideoProperty.eVideoBsType = (VIDEO_BITSTREAM_TYPE)atol (strTag[1].c_str());
+          }
+        }
+      }
+      if (strOutputFile.empty()) {
+        printf ("No output file specified in configuration file.\n");
+        return 1;
+      }
+    } else if (strstr (pArgV[1],
+                       ".264")) { // no output dump yuv file, just try to render the decoded pictures //confirmed_safe_unsafe_usage
+      strInputFile = pArgV[1];
+      sDecParam.uiTargetDqLayer = (uint8_t) - 1;
+      sDecParam.eEcActiveIdc = ERROR_CON_SLICE_COPY;
+      sDecParam.sVideoProperty.eVideoBsType = VIDEO_BITSTREAM_DEFAULT;
+    }
+  } else { //iArgC > 2
+    strInputFile = pArgV[1];
+    strOutputFile = pArgV[2];
+    sDecParam.uiTargetDqLayer = (uint8_t) - 1;
+    sDecParam.eEcActiveIdc = ERROR_CON_SLICE_COPY;
+    sDecParam.sVideoProperty.eVideoBsType = VIDEO_BITSTREAM_DEFAULT;
+    if (iArgC > 3) {
+      for (int i = 3; i < iArgC; i++) {
+        char* cmd = pArgV[i];
+
+        if (!strcmp (cmd, "-options")) {
+          if (i + 1 < iArgC)
+            strOptionFile = pArgV[++i];
+          else {
+            printf ("options file not specified.\n");
+            return 1;
+          }
+        } else if (!strcmp (cmd, "-trace")) {
+          if (i + 1 < iArgC)
+            iLevelSetting = atoi (pArgV[++i]);
+          else {
+            printf ("trace level not specified.\n");
+            return 1;
+          }
+        } else if (!strcmp (cmd, "-length")) {
+          if (i + 1 < iArgC)
+            strLengthFile = pArgV[++i];
+          else {
+            printf ("lenght file not specified.\n");
+            return 1;
+          }
+        } else if (!strcmp (cmd, "-ec")) {
+          if (i + 1 < iArgC) {
+            int iEcActiveIdc = atoi (pArgV[++i]);
+            sDecParam.eEcActiveIdc = (ERROR_CON_IDC)iEcActiveIdc;
+            printf ("ERROR_CON(cealment) is set to %d.\n", iEcActiveIdc);
+          }
+        } else if (!strcmp (cmd, "-legacy")) {
+          bLegacyCalling = true;
+        }
+      }
+    }
+
+    if (strOutputFile.empty()) {
+      printf ("No output file specified in configuration file.\n");
+      return 1;
+    }
+  }
+
+  if (strInputFile.empty()) {
+    printf ("No input file specified in configuration file.\n");
+    return 1;
+  }
+
+
+
+
+  if (WelsCreateDecoder (&pDecoder)  || (NULL == pDecoder)) {
+    printf ("Create Decoder failed.\n");
+    return 1;
+  }
+  if (iLevelSetting >= 0) {
+    pDecoder->SetOption (DECODER_OPTION_TRACE_LEVEL, &iLevelSetting);
+  }
+
+  if (pDecoder->Initialize (&sDecParam)) {
+    printf ("Decoder initialization failed.\n");
+    return 1;
+  }
+
+
+  int32_t iWidth = 0;
+  int32_t iHeight = 0;
+
+
+  H264DecodeInstance (pDecoder, strInputFile.c_str(), !strOutputFile.empty() ? strOutputFile.c_str() : NULL, iWidth,
+                      iHeight,
+                      (!strOptionFile.empty() ? strOptionFile.c_str() : NULL), (!strLengthFile.empty() ? strLengthFile.c_str() : NULL),
+                      (int32_t)sDecParam.eEcActiveIdc,
+                      bLegacyCalling);
+
+  if (sDecParam.pFileNameRestructed != NULL) {
+    delete []sDecParam.pFileNameRestructed;
+    sDecParam.pFileNameRestructed = NULL;
+  }
+
+  if (pDecoder) {
+    pDecoder->Uninitialize();
+
+    WelsDestroyDecoder (pDecoder);
+  }
+
+  return 0;
+}
--- a/codec/decoder/core/inc/cabac_decoder.h
+++ b/codec/decoder/core/inc/cabac_decoder.h
@@ -80,7 +80,7 @@
 
 
 //1. CABAC context initialization
-void WelsCabacGlobalInit(PWelsDecoderContext pCabacCtx);
+void WelsCabacGlobalInit (PWelsDecoderContext pCabacCtx);
 void WelsCabacContextInit (PWelsDecoderContext  pCtx, uint8_t eSliceType, int32_t iCabacInitIdc, int32_t iQp);
 
 //2. decoding Engine initialization
--- a/codec/decoder/core/inc/deblocking.h
+++ b/codec/decoder/core/inc/deblocking.h
@@ -78,6 +78,7 @@
  */
 
 uint32_t DeblockingBsMarginalMBAvcbase (PDqLayer pCurDqLayer, int32_t iEdge, int32_t iNeighMb, int32_t iMbXy);
+uint32_t DeblockingBSliceBsMarginalMBAvcbase (PDqLayer pCurDqLayer, int32_t iEdge, int32_t iNeighMb, int32_t iMbXy);
 
 int32_t DeblockingAvailableNoInterlayer (PDqLayer pCurDqLayer, int32_t iFilterIdc);
 
--- a/codec/decoder/core/inc/dec_frame.h
+++ b/codec/decoder/core/inc/dec_frame.h
@@ -1,150 +1,156 @@
-/*!
- * \copy
- *     Copyright (c)  2013, Cisco Systems
- *     All rights reserved.
- *
- *     Redistribution and use in source and binary forms, with or without
- *     modification, are permitted provided that the following conditions
- *     are met:
- *
- *        * Redistributions of source code must retain the above copyright
- *          notice, this list of conditions and the following disclaimer.
- *
- *        * Redistributions in binary form must reproduce the above copyright
- *          notice, this list of conditions and the following disclaimer in
- *          the documentation and/or other materials provided with the
- *          distribution.
- *
- *     THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- *     "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- *     LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
- *     FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
- *     COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
- *     INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
- *     BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
- *     LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
- *     CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- *     LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
- *     ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- *     POSSIBILITY OF SUCH DAMAGE.
- *
- */
-
-//dec_frame.h
-#ifndef WELS_DEC_FRAME_H__
-#define WELS_DEC_FRAME_H__
-
-#include "typedefs.h"
-#include "wels_const.h"
-#include "wels_common_basis.h"
-#include "parameter_sets.h"
-#include "nal_prefix.h"
-#include "slice.h"
-#include "picture.h"
-#include "bit_stream.h"
-#include "fmo.h"
-
-namespace WelsDec {
-
-///////////////////////////////////DQ Layer level///////////////////////////////////
-typedef struct TagDqLayer       SDqLayer;
-typedef SDqLayer*               PDqLayer;
-typedef struct TagLayerInfo {
-  SNalUnitHeaderExt             sNalHeaderExt;
-  SSlice                        sSliceInLayer;  // Here Slice identify to Frame on concept
-  PSubsetSps                    pSubsetSps;     // current pSubsetSps used, memory alloc in external
-  PSps                          pSps;           // current sps based avc used, memory alloc in external
-  PPps                          pPps;           // current pps used
-} SLayerInfo, *PLayerInfo;
-/* Layer Representation */
-
-struct TagDqLayer {
-  SLayerInfo                    sLayerInfo;
-
-  PBitStringAux                 pBitStringAux;  // pointer to SBitStringAux
-  PFmo                          pFmo;           // Current fmo context pointer used
-  int16_t* pMbType;
-  int32_t* pSliceIdc;                           // using int32_t for slice_idc
-  int16_t (*pMv[LIST_A])[MB_BLOCK4x4_NUM][MV_A];
-  int16_t (*pMvd[LIST_A])[MB_BLOCK4x4_NUM][MV_A];
-  int8_t  (*pRefIndex[LIST_A])[MB_BLOCK4x4_NUM];
-  bool*    pNoSubMbPartSizeLessThan8x8Flag;
-  bool*    pTransformSize8x8Flag;
-  int8_t*  pLumaQp;
-  int8_t (*pChromaQp)[2];
-  int8_t*  pCbp;
-  uint16_t *pCbfDc;
-  int8_t (*pNzc)[24];
-  int8_t (*pNzcRs)[24];
-  int8_t*  pResidualPredFlag;
-  int8_t*  pInterPredictionDoneFlag;
-  bool*    pMbCorrectlyDecodedFlag;
-  bool*    pMbRefConcealedFlag;
-  int16_t (*pScaledTCoeff)[MB_COEFF_LIST_SIZE];
-  int8_t (*pIntraPredMode)[8];  //0~3 top4x4 ; 4~6 left 4x4; 7 intra16x16
-  int8_t (*pIntra4x4FinalMode)[MB_BLOCK4x4_NUM];
-  uint8_t  *pIntraNxNAvailFlag;
-  int8_t*  pChromaPredMode;
-  //uint8_t (*motion_pred_flag[LIST_A])[MB_PARTITION_SIZE]; // 8x8
-  int8_t (*pSubMbType)[MB_SUB_PARTITION_SIZE];
-  int32_t iLumaStride;
-  int32_t iChromaStride;
-  uint8_t* pPred[3];
-  int32_t iMbX;
-  int32_t iMbY;
-  int32_t iMbXyIndex;
-  int32_t iMbWidth;               // MB width of this picture, equal to sSps.iMbWidth
-  int32_t iMbHeight;              // MB height of this picture, equal to sSps.iMbHeight;
-
-  /* Common syntax elements across all slices of a DQLayer */
-  int32_t                   iSliceIdcBackup;
-  uint32_t                  uiSpsId;
-  uint32_t                  uiPpsId;
-  uint32_t                  uiDisableInterLayerDeblockingFilterIdc;
-  int32_t                   iInterLayerSliceAlphaC0Offset;
-  int32_t                   iInterLayerSliceBetaOffset;
-  //SPosOffset              sScaledRefLayer;
-  int32_t                   iSliceGroupChangeCycle;
-
-  PRefPicListReorderSyn     pRefPicListReordering;
-  PPredWeightTabSyn         pPredWeightTable;
-  PRefPicMarking            pRefPicMarking; // Decoded reference picture marking syntaxs
-  PRefBasePicMarking        pRefPicBaseMarking;
-
-  PPicture                  pRef;                   // reference picture pointer
-  PPicture                  pDec;                   // reconstruction picture pointer for layer
-
-  bool                      bUseWeightPredictionFlag;
-  bool                      bStoreRefBasePicFlag;                           // iCurTid == 0 && iCurQid = 0 && bEncodeKeyPic = 1
-  bool                      bTCoeffLevelPredFlag;
-  bool                      bConstrainedIntraResamplingFlag;
-  uint8_t                   uiRefLayerDqId;
-  uint8_t                   uiRefLayerChromaPhaseXPlus1Flag;
-  uint8_t                   uiRefLayerChromaPhaseYPlus1;
-  uint8_t                   uiLayerDqId;                    // dq_id of current layer
-  bool                      bUseRefBasePicFlag;     // whether reference pic or reference base pic is referred?
-};
-
-typedef struct TagGpuAvcLayer {
-  SLayerInfo                sLayerInfo;
-  PBitStringAux             pBitStringAux;  // pointer to SBitStringAux
-
-  int16_t*                  pMbType;
-  int32_t*                  pSliceIdc;      // using int32_t for slice_idc
-  int8_t*                   pLumaQp;
-  int8_t*                   pCbp;
-  int8_t                    (*pNzc)[24];
-  int8_t                    (*pIntraPredMode)[8];     //0~3 top4x4 ; 4~6 left 4x4; 7 intra16x16
-  int32_t                   iMbX;
-  int32_t                   iMbY;
-  int32_t                   iMbXyIndex;
-  int32_t                   iMbWidth;               // MB width of this picture, equal to sSps.iMbWidth
-  int32_t                   iMbHeight;              // MB height of this picture, equal to sSps.iMbHeight;
-
-} SGpuAvcDqLayer, *PGpuAvcDqLayer;
-
-///////////////////////////////////////////////////////////////////////
-
-} // namespace WelsDec
-
-#endif//WELS_DEC_FRAME_H__
+/*!
+ * \copy
+ *     Copyright (c)  2013, Cisco Systems
+ *     All rights reserved.
+ *
+ *     Redistribution and use in source and binary forms, with or without
+ *     modification, are permitted provided that the following conditions
+ *     are met:
+ *
+ *        * Redistributions of source code must retain the above copyright
+ *          notice, this list of conditions and the following disclaimer.
+ *
+ *        * Redistributions in binary form must reproduce the above copyright
+ *          notice, this list of conditions and the following disclaimer in
+ *          the documentation and/or other materials provided with the
+ *          distribution.
+ *
+ *     THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *     "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *     LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+ *     FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+ *     COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ *     INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ *     BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ *     LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+ *     CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ *     LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
+ *     ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ *     POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+
+//dec_frame.h
+#ifndef WELS_DEC_FRAME_H__
+#define WELS_DEC_FRAME_H__
+
+#include "typedefs.h"
+#include "wels_const.h"
+#include "wels_common_basis.h"
+#include "parameter_sets.h"
+#include "nal_prefix.h"
+#include "slice.h"
+#include "picture.h"
+#include "bit_stream.h"
+#include "fmo.h"
+
+namespace WelsDec {
+
+///////////////////////////////////DQ Layer level///////////////////////////////////
+typedef struct TagDqLayer       SDqLayer;
+typedef SDqLayer*               PDqLayer;
+typedef struct TagLayerInfo {
+  SNalUnitHeaderExt             sNalHeaderExt;
+  SSlice                        sSliceInLayer;  // Here Slice identify to Frame on concept
+  PSubsetSps                    pSubsetSps;     // current pSubsetSps used, memory alloc in external
+  PSps                          pSps;           // current sps based avc used, memory alloc in external
+  PPps                          pPps;           // current pps used
+} SLayerInfo, *PLayerInfo;
+/* Layer Representation */
+
+struct TagDqLayer {
+  SLayerInfo                    sLayerInfo;
+
+  PBitStringAux                 pBitStringAux;  // pointer to SBitStringAux
+  PFmo                          pFmo;           // Current fmo context pointer used
+  uint32_t* pMbType;
+  int32_t* pSliceIdc;                           // using int32_t for slice_idc
+  int16_t (*pMv[LIST_A])[MB_BLOCK4x4_NUM][MV_A];
+  int16_t (*pMvd[LIST_A])[MB_BLOCK4x4_NUM][MV_A];
+  int8_t  (*pRefIndex[LIST_A])[MB_BLOCK4x4_NUM];
+	int8_t	(*pDirect)[MB_BLOCK4x4_NUM];
+  bool*    pNoSubMbPartSizeLessThan8x8Flag;
+  bool*    pTransformSize8x8Flag;
+  int8_t*  pLumaQp;
+  int8_t (*pChromaQp)[2];
+  int8_t*  pCbp;
+  uint16_t *pCbfDc;
+  int8_t (*pNzc)[24];
+  int8_t (*pNzcRs)[24];
+  int8_t*  pResidualPredFlag;
+  int8_t*  pInterPredictionDoneFlag;
+  bool*    pMbCorrectlyDecodedFlag;
+  bool*    pMbRefConcealedFlag;
+  int16_t (*pScaledTCoeff)[MB_COEFF_LIST_SIZE];
+  int8_t (*pIntraPredMode)[8];  //0~3 top4x4 ; 4~6 left 4x4; 7 intra16x16
+  int8_t (*pIntra4x4FinalMode)[MB_BLOCK4x4_NUM];
+  uint8_t  *pIntraNxNAvailFlag;
+  int8_t*  pChromaPredMode;
+  //uint8_t (*motion_pred_flag[LIST_A])[MB_PARTITION_SIZE]; // 8x8
+  uint32_t (*pSubMbType)[MB_SUB_PARTITION_SIZE];
+  int32_t iLumaStride;
+  int32_t iChromaStride;
+  uint8_t* pPred[3];
+  int32_t iMbX;
+  int32_t iMbY;
+  int32_t iMbXyIndex;
+  int32_t iMbWidth;               // MB width of this picture, equal to sSps.iMbWidth
+  int32_t iMbHeight;              // MB height of this picture, equal to sSps.iMbHeight;
+
+  /* Common syntax elements across all slices of a DQLayer */
+  int32_t                   iSliceIdcBackup;
+  uint32_t                  uiSpsId;
+  uint32_t                  uiPpsId;
+  uint32_t                  uiDisableInterLayerDeblockingFilterIdc;
+  int32_t                   iInterLayerSliceAlphaC0Offset;
+  int32_t                   iInterLayerSliceBetaOffset;
+  //SPosOffset              sScaledRefLayer;
+  int32_t                   iSliceGroupChangeCycle;
+
+  PRefPicListReorderSyn     pRefPicListReordering;
+  PPredWeightTabSyn         pPredWeightTable;
+  PRefPicMarking            pRefPicMarking; // Decoded reference picture marking syntaxs
+  PRefBasePicMarking        pRefPicBaseMarking;
+
+  PPicture                  pRef;                   // reference picture pointer
+  PPicture                  pDec;                   // reconstruction picture pointer for layer
+
+	int16_t										iColocMv[2][16][2];     //Colocated MV cache
+	int8_t										iColocRefIndex[2][16];  //Colocated RefIndex cache
+	int8_t										iColocIntra[16];			  //Colocated Intra cache
+
+  bool                      bUseWeightPredictionFlag;
+	bool                      bUseWeightedBiPredIdc;
+	bool                      bStoreRefBasePicFlag;                           // iCurTid == 0 && iCurQid = 0 && bEncodeKeyPic = 1
+  bool                      bTCoeffLevelPredFlag;
+  bool                      bConstrainedIntraResamplingFlag;
+  uint8_t                   uiRefLayerDqId;
+  uint8_t                   uiRefLayerChromaPhaseXPlus1Flag;
+  uint8_t                   uiRefLayerChromaPhaseYPlus1;
+  uint8_t                   uiLayerDqId;                    // dq_id of current layer
+  bool                      bUseRefBasePicFlag;     // whether reference pic or reference base pic is referred?
+};
+
+typedef struct TagGpuAvcLayer {
+  SLayerInfo                sLayerInfo;
+  PBitStringAux             pBitStringAux;  // pointer to SBitStringAux
+
+	uint32_t*                  pMbType;
+  int32_t*                  pSliceIdc;      // using int32_t for slice_idc
+  int8_t*                   pLumaQp;
+  int8_t*                   pCbp;
+  int8_t                    (*pNzc)[24];
+  int8_t                    (*pIntraPredMode)[8];     //0~3 top4x4 ; 4~6 left 4x4; 7 intra16x16
+  int32_t                   iMbX;
+  int32_t                   iMbY;
+  int32_t                   iMbXyIndex;
+  int32_t                   iMbWidth;               // MB width of this picture, equal to sSps.iMbWidth
+  int32_t                   iMbHeight;              // MB height of this picture, equal to sSps.iMbHeight;
+
+} SGpuAvcDqLayer, *PGpuAvcDqLayer;
+
+///////////////////////////////////////////////////////////////////////
+
+} // namespace WelsDec
+
+#endif//WELS_DEC_FRAME_H__
--- a/codec/decoder/core/inc/dec_golomb.h
+++ b/codec/decoder/core/inc/dec_golomb.h
@@ -89,9 +89,9 @@
 
 // for data sharing cross modules and try to reduce size of binary generated, 12/10/2009
 extern const uint8_t g_kuiIntra4x4CbpTable[48];
-    extern const uint8_t g_kuiIntra4x4CbpTable400[16];
+extern const uint8_t g_kuiIntra4x4CbpTable400[16];
 extern const uint8_t g_kuiInterCbpTable[48];
-    extern const uint8_t g_kuiInterCbpTable400[16];
+extern const uint8_t g_kuiInterCbpTable400[16];
 
 extern const uint8_t g_kuiLeadingZeroTable[256];
 
@@ -236,7 +236,7 @@
 /*
  *      Check whether there is more rbsp data for processing
  */
-static inline bool CheckMoreRBSPData(PBitStringAux pBsAux) {
+static inline bool CheckMoreRBSPData (PBitStringAux pBsAux) {
   if ((pBsAux->iBits - ((pBsAux->pCurBuf - pBsAux->pStartBuf - 2) << 3) - pBsAux->iLeftBits) > 1) {
     return true;
   } else {
--- a/codec/decoder/core/inc/decode_slice.h
+++ b/codec/decoder/core/inc/decode_slice.h
@@ -44,10 +44,12 @@
 int32_t WelsDecodeMbCavlcPSlice (PWelsDecoderContext pCtx, PNalUnit pNalCur, uint32_t& uiEosFlag);
 typedef int32_t (*PWelsDecMbFunc) (PWelsDecoderContext pCtx, PNalUnit pNalCur, uint32_t& uiEosFlag);
 
-int32_t WelsDecodeMbCabacISlice(PWelsDecoderContext pCtx, PNalUnit pNalCur, uint32_t& uiEosFlag);
-int32_t WelsDecodeMbCabacPSlice(PWelsDecoderContext pCtx, PNalUnit pNalCur, uint32_t& uiEosFlag);
-int32_t WelsDecodeMbCabacISliceBaseMode0(PWelsDecoderContext pCtx, uint32_t& uiEosFlag);
-int32_t WelsDecodeMbCabacPSliceBaseMode0(PWelsDecoderContext pCtx, PWelsNeighAvail pNeighAvail, uint32_t& uiEosFlag);
+int32_t WelsDecodeMbCabacISlice (PWelsDecoderContext pCtx, PNalUnit pNalCur, uint32_t& uiEosFlag);
+int32_t WelsDecodeMbCabacPSlice (PWelsDecoderContext pCtx, PNalUnit pNalCur, uint32_t& uiEosFlag);
+int32_t WelsDecodeMbCabacBSlice (PWelsDecoderContext pCtx, PNalUnit pNalCur, uint32_t& uiEosFlag);
+int32_t WelsDecodeMbCabacISliceBaseMode0 (PWelsDecoderContext pCtx, uint32_t& uiEosFlag);
+int32_t WelsDecodeMbCabacPSliceBaseMode0 (PWelsDecoderContext pCtx, PWelsNeighAvail pNeighAvail, uint32_t& uiEosFlag);
+int32_t WelsDecodeMbCabacBSliceBaseMode0 (PWelsDecoderContext pCtx, PWelsNeighAvail pNeighAvail, uint32_t& uiEosFlag);
 
 int32_t WelsTargetSliceConstruction (PWelsDecoderContext pCtx); //construction based on slice
 
@@ -59,9 +61,10 @@
 int32_t WelsMbInterSampleConstruction (PWelsDecoderContext pCtx, PDqLayer pCurLayer,
                                        uint8_t* pDstY, uint8_t* pDstU, uint8_t* pDstV, int32_t iStrideL, int32_t iStrideC);
 int32_t WelsMbInterConstruction (PWelsDecoderContext pCtx, PDqLayer pCurLayer);
-void WelsLumaDcDequantIdct (int16_t* pBlock, int32_t iQp,PWelsDecoderContext pCtx);
+void WelsLumaDcDequantIdct (int16_t* pBlock, int32_t iQp, PWelsDecoderContext pCtx);
 int32_t WelsMbInterPrediction (PWelsDecoderContext pCtx, PDqLayer pCurLayer);
 void WelsChromaDcIdct (int16_t* pBlock);
+bool ComputeColocated (PWelsDecoderContext pCtx);
 
 #ifdef __cplusplus
 extern "C" {
@@ -68,18 +71,18 @@
 #endif//__cplusplus
 
 #if defined(X86_ASM)
-void WelsBlockZero16x16_sse2(int16_t * block, int32_t stride);
-void WelsBlockZero8x8_sse2(int16_t * block, int32_t stride);
+void WelsBlockZero16x16_sse2 (int16_t* block, int32_t stride);
+void WelsBlockZero8x8_sse2 (int16_t* block, int32_t stride);
 #endif
 
 #if defined(HAVE_NEON)
-void WelsBlockZero16x16_neon(int16_t * block, int32_t stride);
-void WelsBlockZero8x8_neon(int16_t * block, int32_t stride);
+void WelsBlockZero16x16_neon (int16_t* block, int32_t stride);
+void WelsBlockZero8x8_neon (int16_t* block, int32_t stride);
 #endif
 
 #if defined(HAVE_NEON_AARCH64)
-void WelsBlockZero16x16_AArch64_neon(int16_t * block, int32_t stride);
-void WelsBlockZero8x8_AArch64_neon(int16_t * block, int32_t stride);
+void WelsBlockZero16x16_AArch64_neon (int16_t* block, int32_t stride);
+void WelsBlockZero8x8_AArch64_neon (int16_t* block, int32_t stride);
 #endif
 #ifdef __cplusplus
 }
@@ -86,8 +89,8 @@
 #endif//__cplusplus
 
 void WelsBlockFuncInit (SBlockFunc* pFunc,  int32_t iCpu);
-void WelsBlockZero16x16_c(int16_t * block, int32_t stride);
-void WelsBlockZero8x8_c(int16_t * block, int32_t stride);
+void WelsBlockZero16x16_c (int16_t* block, int32_t stride);
+void WelsBlockZero8x8_c (int16_t* block, int32_t stride);
 
 } // namespace WelsDec
 
--- a/codec/decoder/core/inc/decoder.h
+++ b/codec/decoder/core/inc/decoder.h
@@ -111,7 +111,8 @@
 /*
  *  request memory blocks for decoder avc part
  */
-int32_t WelsRequestMem (PWelsDecoderContext pCtx, const int32_t kiMbWidth, const int32_t kiMbHeight, bool& bReallocFlag);
+int32_t WelsRequestMem (PWelsDecoderContext pCtx, const int32_t kiMbWidth, const int32_t kiMbHeight,
+                        bool& bReallocFlag);
 
 
 /*
--- a/codec/decoder/core/inc/decoder_context.h
+++ b/codec/decoder/core/inc/decoder_context.h
@@ -81,6 +81,7 @@
 #define NEW_CTX_OFFSET_MB_TYPE_I 3
 #define NEW_CTX_OFFSET_SKIP 11
 #define NEW_CTX_OFFSET_SUBMB_TYPE 21
+#define NEW_CTX_OFFSET_B_SUBMB_TYPE 36
 #define NEW_CTX_OFFSET_MVD 40
 #define NEW_CTX_OFFSET_REF_NO 54
 #define NEW_CTX_OFFSET_DELTA_QP 60
@@ -234,7 +235,8 @@
 typedef struct TagWelsDecoderContext {
   SLogContext sLogCtx;
 // Input
-  void*                         pArgDec;                        // structured arguments for decoder, reserved here for extension in the future
+  void*
+  pArgDec;                        // structured arguments for decoder, reserved here for extension in the future
 
   SDataBuffer                   sRawData;
   SDataBuffer                   sSavedData; //for parse only purpose
@@ -248,8 +250,10 @@
 
   int32_t                       iImgWidthInPixel;       // width of image in pixel reconstruction picture to be output
   int32_t                       iImgHeightInPixel;// height of image in pixel reconstruction picture to be output
-  int32_t                       iLastImgWidthInPixel;   // width of image in last successful pixel reconstruction picture to be output
-  int32_t                       iLastImgHeightInPixel;// height of image in last successful pixel reconstruction picture to be output
+  int32_t
+  iLastImgWidthInPixel;   // width of image in last successful pixel reconstruction picture to be output
+  int32_t
+  iLastImgHeightInPixel;// height of image in last successful pixel reconstruction picture to be output
   bool bFreezeOutput; // indicating current frame freezing. Default: true
 
 
@@ -256,8 +260,10 @@
 // Derived common elements
   SNalUnitHeader                sCurNalHead;
   EWelsSliceType                eSliceType;                     // Slice type
+  bool                          bUsedAsRef;                     //flag as ref
   int32_t                       iFrameNum;
-  int32_t                       iPrevFrameNum;          // frame number of previous frame well decoded for non-truncated mode yet
+  int32_t
+  iPrevFrameNum;          // frame number of previous frame well decoded for non-truncated mode yet
   bool                          bLastHasMmco5;      //
   int32_t                       iErrorCode;                     // error code return while decoding in case packets lost
   SFmo                          sFmoList[MAX_PPS_COUNT];        // list for FMO storage
@@ -269,25 +275,26 @@
   iDecBlockOffsetArray[24];     // address talbe for sub 4x4 block in intra4x4_mb, so no need to caculta the address every time.
 
   struct {
-    int16_t*  pMbType[LAYER_NUM_EXCHANGEABLE];                      /* mb type */
+    uint32_t*  pMbType[LAYER_NUM_EXCHANGEABLE];                      /* mb type */
     int16_t (*pMv[LAYER_NUM_EXCHANGEABLE][LIST_A])[MB_BLOCK4x4_NUM][MV_A]; //[LAYER_NUM_EXCHANGEABLE   MB_BLOCK4x4_NUM*]
     int8_t (*pRefIndex[LAYER_NUM_EXCHANGEABLE][LIST_A])[MB_BLOCK4x4_NUM];
+    int8_t (*pDirect[LAYER_NUM_EXCHANGEABLE])[MB_BLOCK4x4_NUM];
     bool*   pNoSubMbPartSizeLessThan8x8Flag[LAYER_NUM_EXCHANGEABLE];
     bool*   pTransformSize8x8Flag[LAYER_NUM_EXCHANGEABLE];
     int8_t* pLumaQp[LAYER_NUM_EXCHANGEABLE];        /*mb luma_qp*/
-    int8_t  (*pChromaQp[LAYER_NUM_EXCHANGEABLE])[2];                                        /*mb chroma_qp*/
+    int8_t (*pChromaQp[LAYER_NUM_EXCHANGEABLE])[2];                                         /*mb chroma_qp*/
     int16_t (*pMvd[LAYER_NUM_EXCHANGEABLE][LIST_A])[MB_BLOCK4x4_NUM][MV_A]; //[LAYER_NUM_EXCHANGEABLE   MB_BLOCK4x4_NUM*]
     uint16_t* pCbfDc[LAYER_NUM_EXCHANGEABLE];
-    int8_t  (*pNzc[LAYER_NUM_EXCHANGEABLE])[24];
-    int8_t  (*pNzcRs[LAYER_NUM_EXCHANGEABLE])[24];
+    int8_t (*pNzc[LAYER_NUM_EXCHANGEABLE])[24];
+    int8_t (*pNzcRs[LAYER_NUM_EXCHANGEABLE])[24];
     int16_t (*pScaledTCoeff[LAYER_NUM_EXCHANGEABLE])[MB_COEFF_LIST_SIZE]; /*need be aligned*/
-    int8_t  (*pIntraPredMode[LAYER_NUM_EXCHANGEABLE])[8]; //0~3 top4x4 ; 4~6 left 4x4; 7 intra16x16
+    int8_t (*pIntraPredMode[LAYER_NUM_EXCHANGEABLE])[8];  //0~3 top4x4 ; 4~6 left 4x4; 7 intra16x16
     int8_t (*pIntra4x4FinalMode[LAYER_NUM_EXCHANGEABLE])[MB_BLOCK4x4_NUM];
     uint8_t* pIntraNxNAvailFlag[LAYER_NUM_EXCHANGEABLE];
     int8_t*  pChromaPredMode[LAYER_NUM_EXCHANGEABLE];
     int8_t*  pCbp[LAYER_NUM_EXCHANGEABLE];
     uint8_t (*pMotionPredFlag[LAYER_NUM_EXCHANGEABLE][LIST_A])[MB_PARTITION_SIZE]; // 8x8
-    int8_t (*pSubMbType[LAYER_NUM_EXCHANGEABLE])[MB_SUB_PARTITION_SIZE];
+    uint32_t (*pSubMbType[LAYER_NUM_EXCHANGEABLE])[MB_SUB_PARTITION_SIZE];
     int32_t* pSliceIdc[LAYER_NUM_EXCHANGEABLE];         // using int32_t for slice_idc
     int8_t*  pResidualPredFlag[LAYER_NUM_EXCHANGEABLE];
     int8_t*  pInterPredictionDoneFlag[LAYER_NUM_EXCHANGEABLE];
@@ -300,6 +307,9 @@
 // reconstruction picture
   PPicture                      pDec;                   //pointer to current picture being reconstructed
 
+  PPicture
+  pTempDec;               //pointer to temp decoder picture to be used only for Bi Prediction.
+
 // reference pictures
   SRefPic                       sRefPic;
 
@@ -316,7 +326,7 @@
   SPps                          sPpsBuffer[MAX_PPS_COUNT + 1];
   PSliceHeader                  pSliceHeader;
 
-  PPicBuff                      pPicBuff[LIST_A];       // Initially allocated memory for pictures which are used in decoding.
+  PPicBuff                      pPicBuff;       // Initially allocated memory for pictures which are used in decoding.
   int32_t                       iPicQueueNumber;
 
   SSubsetSps                    sSubsetSpsBuffer[MAX_SPS_COUNT + 1];
@@ -327,7 +337,8 @@
   PSps                          pSps;   // used by current AU
   PPps                          pPps;   // used by current AU
 // Memory for pAccessUnitList is dynamically held till decoder destruction.
-  PDqLayer                      pCurDqLayer;            // current DQ layer representation, also carry reference base layer if applicable
+  PDqLayer
+  pCurDqLayer;            // current DQ layer representation, also carry reference base layer if applicable
   PDqLayer                      pDqLayersList[LAYER_NUM_EXCHANGEABLE];  // DQ layers list with memory allocated
 
   int32_t                       iPicWidthReq;             // picture width have requested the memory
@@ -365,7 +376,8 @@
 #ifdef LONG_TERM_REF
   bool                          bParamSetsLostFlag;     //sps or pps do not exist or not correct
 
-  bool                          bCurAuContainLtrMarkSeFlag; //current AU has the LTR marking syntax element, mark the previous frame or self
+  bool
+  bCurAuContainLtrMarkSeFlag; //current AU has the LTR marking syntax element, mark the previous frame or self
   int32_t                       iFrameNumOfAuMarkedLtr; //if bCurAuContainLtrMarkSeFlag==true, SHOULD set this variable
 
   uint16_t                      uiCurIdrPicId;
@@ -427,6 +439,9 @@
 //Save the last nal header info
   SNalUnitHeaderExt sLastNalHdrExt;
   SSliceHeader      sLastSliceHeader;
+  int32_t           iPrevPicOrderCntMsb;
+  int32_t           iPrevPicOrderCntLsb;
+
   SWelsCabacCtx sWelsCabacContexts[4][WELS_QP_MAX + 1][WELS_CONTEXT_COUNT];
   bool bCabacInited;
   SWelsCabacCtx   pCabacCtx[WELS_CONTEXT_COUNT];
--- a/codec/decoder/core/inc/decoder_core.h
+++ b/codec/decoder/core/inc/decoder_core.h
@@ -170,7 +170,7 @@
 
 bool CheckRefPicturesComplete (PWelsDecoderContext pCtx); // Check whether all ref pictures are complete
 
-void ForceResetParaSetStatusAndAUList(PWelsDecoderContext pCtx);
+void ForceResetParaSetStatusAndAUList (PWelsDecoderContext pCtx);
 } // namespace WelsDec
 
 #endif//WELS_DECODER_CORE_H__
--- a/codec/decoder/core/inc/error_code.h
+++ b/codec/decoder/core/inc/error_code.h
@@ -43,12 +43,12 @@
 namespace WelsDec {
 
 typedef enum TagWelsErr {
-ERR_NONE                = 0,
-ERR_INVALID_PARAMETERS  = 1,
-ERR_MALLOC_FAILED       = 2,
-ERR_API_FAILED          = 3,
+  ERR_NONE                = 0,
+  ERR_INVALID_PARAMETERS  = 1,
+  ERR_MALLOC_FAILED       = 2,
+  ERR_API_FAILED          = 3,
 
-ERR_BOUND               = 31
+  ERR_BOUND               = 31
 } EWelsErr;
 
 /*
@@ -62,13 +62,13 @@
 /* ERR_LEVEL */
 //-----------------------------------------------------------------------------------------------------------
 enum {
-ERR_LEVEL_ACCESS_UNIT = 1,
-ERR_LEVEL_NAL_UNIT_HEADER,
-ERR_LEVEL_PREFIX_NAL,
-ERR_LEVEL_PARAM_SETS,
-ERR_LEVEL_SLICE_HEADER,
-ERR_LEVEL_SLICE_DATA,
-ERR_LEVEL_MB_DATA
+  ERR_LEVEL_ACCESS_UNIT = 1,
+  ERR_LEVEL_NAL_UNIT_HEADER,
+  ERR_LEVEL_PREFIX_NAL,
+  ERR_LEVEL_PARAM_SETS,
+  ERR_LEVEL_SLICE_HEADER,
+  ERR_LEVEL_SLICE_DATA,
+  ERR_LEVEL_MB_DATA
 };
 
 //-----------------------------------------------------------------------------------------------------------
@@ -79,138 +79,139 @@
 #define ERR_INFO_SYNTAX_BASE        1001
 #define ERR_INFO_LOGIC_BASE         10001
 enum {
-/* Error from common system level: 1-1000 */
-ERR_INFO_OUT_OF_MEMORY      = ERR_INFO_COMMON_BASE,
-ERR_INFO_INVALID_ACCESS,
-ERR_INFO_INVALID_PTR,
-ERR_INFO_INVALID_PARAM,
-ERR_INFO_FILE_NO_FOUND,
-ERR_INFO_PATH_NO_FOUND,
-ERR_INFO_ACCESS_DENIED,
-ERR_INFO_NOT_READY,
-ERR_INFO_WRITE_FAULT,
-ERR_INFO_READ_FAULT,
-ERR_INFO_READ_OVERFLOW,
-ERR_INFO_READ_LEADING_ZERO,
-ERR_INFO_UNINIT,
-/* Error from H.264 syntax elements parser: 1001-10000 */
-ERR_INFO_NO_PREFIX_CODE         = ERR_INFO_SYNTAX_BASE, // No start prefix code indication
-ERR_INFO_NO_PARAM_SETS,                                 // No SPS and/ PPS before sequence header
-ERR_INFO_PARAM_SETS_NOT_INTEGRATED,                     // Parameters sets (sps/pps) are not integrated at all before to decode VCL nal
-ERR_INFO_SPS_ID_OVERFLOW,
-ERR_INFO_PPS_ID_OVERFLOW,
-ERR_INFO_INVALID_PROFILE_IDC,
-ERR_INFO_UNMATCHED_LEVEL_IDC,
-ERR_INFO_INVALID_POC_TYPE,
-ERR_INFO_INVALID_MB_SIZE_INFO,
-ERR_INFO_REF_COUNT_OVERFLOW,
-ERR_INFO_CROPPING_NO_SUPPORTED,
-ERR_INFO_INVALID_CROPPING_DATA,
-ERR_INFO_UNSUPPORTED_VUI_HRD,
-ERR_INFO_INVALID_SLICEGROUP,
-ERR_INFO_INVALID_SLICEGROUP_MAP_TYPE,
-ERR_INFO_INVALID_FRAME_NUM,
-ERR_INFO_INVALID_IDR_PIC_ID,
-ERR_INFO_INVALID_REDUNDANT_PIC_CNT,
-ERR_INFO_INVALID_MAX_NUM_REF_FRAMES,
-ERR_INFO_INVALID_MAX_MB_SIZE,
-ERR_INFO_INVALID_FIRST_MB_IN_SLICE,
-ERR_INFO_INVALID_NUM_REF_IDX_L0_ACTIVE_MINUS1,
-ERR_INFO_INVALID_SLICE_ALPHA_C0_OFFSET_DIV2,
-ERR_INFO_INVALID_SLICE_BETA_OFFSET_DIV2,
-ERR_INFO_FMO_INIT_FAIL,
-ERR_INFO_SLICE_TYPE_OVERFLOW,
-ERR_INFO_INVALID_CABAC_INIT_IDC,
-ERR_INFO_INVALID_QP,
-ERR_INFO_INVALID_PIC_INIT_QS,
-ERR_INFO_INVALID_CHROMA_QP_INDEX_OFFSET,
-ERR_INFO_INVALID_PIC_INIT_QP,
-ERR_INFO_INVALID_LOG2_MAX_FRAME_NUM_MINUS4,
-ERR_INFO_INVALID_LOG2_MAX_PIC_ORDER_CNT_LSB_MINUS4,
-ERR_INFO_INVALID_NUM_REF_FRAME_IN_PIC_ORDER_CNT_CYCLE,
-ERR_INFO_INVALID_DBLOCKING_IDC,
-ERR_INFO_INVALID_MB_TYPE,
-ERR_INFO_INVALID_MB_SKIP_RUN,
-ERR_INFO_INVALID_SPS_ID,
-ERR_INFO_INVALID_PPS_ID,
-ERR_INFO_INVALID_SUB_MB_TYPE,
-ERR_INFO_UNAVAILABLE_TOP_BLOCK_FOR_INTRA,
-ERR_INFO_UNAVAILABLE_LEFT_BLOCK_FOR_INTRA,
-ERR_INFO_INVALID_REF_INDEX,
-ERR_INFO_INVALID_CBP,
-ERR_INFO_DQUANT_OUT_OF_RANGE,
-ERR_INFO_CAVLC_INVALID_PREFIX,
-ERR_INFO_CAVLC_INVALID_LEVEL,
-ERR_INFO_CAVLC_INVALID_TOTAL_COEFF_OR_TRAILING_ONES,
-ERR_INFO_CAVLC_INVALID_ZERO_LEFT,
-ERR_INFO_CAVLC_INVALID_RUN_BEFORE,
-ERR_INFO_MV_OUT_OF_RANGE,
+  /* Error from common system level: 1-1000 */
+  ERR_INFO_OUT_OF_MEMORY      = ERR_INFO_COMMON_BASE,
+  ERR_INFO_INVALID_ACCESS,
+  ERR_INFO_INVALID_PTR,
+  ERR_INFO_INVALID_PARAM,
+  ERR_INFO_FILE_NO_FOUND,
+  ERR_INFO_PATH_NO_FOUND,
+  ERR_INFO_ACCESS_DENIED,
+  ERR_INFO_NOT_READY,
+  ERR_INFO_WRITE_FAULT,
+  ERR_INFO_READ_FAULT,
+  ERR_INFO_READ_OVERFLOW,
+  ERR_INFO_READ_LEADING_ZERO,
+  ERR_INFO_UNINIT,
+  /* Error from H.264 syntax elements parser: 1001-10000 */
+  ERR_INFO_NO_PREFIX_CODE         = ERR_INFO_SYNTAX_BASE, // No start prefix code indication
+  ERR_INFO_NO_PARAM_SETS,                                 // No SPS and/ PPS before sequence header
+  ERR_INFO_PARAM_SETS_NOT_INTEGRATED,                     // Parameters sets (sps/pps) are not integrated at all before to decode VCL nal
+  ERR_INFO_SPS_ID_OVERFLOW,
+  ERR_INFO_PPS_ID_OVERFLOW,
+  ERR_INFO_INVALID_PROFILE_IDC,
+  ERR_INFO_UNMATCHED_LEVEL_IDC,
+  ERR_INFO_INVALID_POC_TYPE,
+  ERR_INFO_INVALID_MB_SIZE_INFO,
+  ERR_INFO_REF_COUNT_OVERFLOW,
+  ERR_INFO_CROPPING_NO_SUPPORTED,
+  ERR_INFO_INVALID_CROPPING_DATA,
+  ERR_INFO_UNSUPPORTED_VUI_HRD,
+  ERR_INFO_INVALID_SLICEGROUP,
+  ERR_INFO_INVALID_SLICEGROUP_MAP_TYPE,
+  ERR_INFO_INVALID_FRAME_NUM,
+  ERR_INFO_INVALID_IDR_PIC_ID,
+  ERR_INFO_INVALID_REDUNDANT_PIC_CNT,
+  ERR_INFO_INVALID_MAX_NUM_REF_FRAMES,
+  ERR_INFO_INVALID_MAX_MB_SIZE,
+  ERR_INFO_INVALID_FIRST_MB_IN_SLICE,
+  ERR_INFO_INVALID_NUM_REF_IDX_L0_ACTIVE_MINUS1,
+  ERR_INFO_INVALID_NUM_REF_IDX_L1_ACTIVE_MINUS1,
+  ERR_INFO_INVALID_SLICE_ALPHA_C0_OFFSET_DIV2,
+  ERR_INFO_INVALID_SLICE_BETA_OFFSET_DIV2,
+  ERR_INFO_FMO_INIT_FAIL,
+  ERR_INFO_SLICE_TYPE_OVERFLOW,
+  ERR_INFO_INVALID_CABAC_INIT_IDC,
+  ERR_INFO_INVALID_QP,
+  ERR_INFO_INVALID_PIC_INIT_QS,
+  ERR_INFO_INVALID_CHROMA_QP_INDEX_OFFSET,
+  ERR_INFO_INVALID_PIC_INIT_QP,
+  ERR_INFO_INVALID_LOG2_MAX_FRAME_NUM_MINUS4,
+  ERR_INFO_INVALID_LOG2_MAX_PIC_ORDER_CNT_LSB_MINUS4,
+  ERR_INFO_INVALID_NUM_REF_FRAME_IN_PIC_ORDER_CNT_CYCLE,
+  ERR_INFO_INVALID_DBLOCKING_IDC,
+  ERR_INFO_INVALID_MB_TYPE,
+  ERR_INFO_INVALID_MB_SKIP_RUN,
+  ERR_INFO_INVALID_SPS_ID,
+  ERR_INFO_INVALID_PPS_ID,
+  ERR_INFO_INVALID_SUB_MB_TYPE,
+  ERR_INFO_UNAVAILABLE_TOP_BLOCK_FOR_INTRA,
+  ERR_INFO_UNAVAILABLE_LEFT_BLOCK_FOR_INTRA,
+  ERR_INFO_INVALID_REF_INDEX,
+  ERR_INFO_INVALID_CBP,
+  ERR_INFO_DQUANT_OUT_OF_RANGE,
+  ERR_INFO_CAVLC_INVALID_PREFIX,
+  ERR_INFO_CAVLC_INVALID_LEVEL,
+  ERR_INFO_CAVLC_INVALID_TOTAL_COEFF_OR_TRAILING_ONES,
+  ERR_INFO_CAVLC_INVALID_ZERO_LEFT,
+  ERR_INFO_CAVLC_INVALID_RUN_BEFORE,
+  ERR_INFO_MV_OUT_OF_RANGE,
 
-ERR_INFO_INVALID_I4x4_PRED_MODE,
-ERR_INFO_INVALID_I16x16_PRED_MODE,
-ERR_INFO_INVALID_I_CHROMA_PRED_MODE,
+  ERR_INFO_INVALID_I4x4_PRED_MODE,
+  ERR_INFO_INVALID_I16x16_PRED_MODE,
+  ERR_INFO_INVALID_I_CHROMA_PRED_MODE,
 
-ERR_INFO_INVALID_LUMA_LOG2_WEIGHT_DENOM,
-ERR_INFO_INVALID_CHROMA_LOG2_WEIGHT_DENOM,
-ERR_INFO_INVALID_LUMA_WEIGHT,
-ERR_INFO_INVALID_CHROMA_WEIGHT,
-ERR_INFO_INVALID_LUMA_OFFSET,
-ERR_INFO_INVALID_CHROMA_OFFSET,
+  ERR_INFO_INVALID_LUMA_LOG2_WEIGHT_DENOM,
+  ERR_INFO_INVALID_CHROMA_LOG2_WEIGHT_DENOM,
+  ERR_INFO_INVALID_LUMA_WEIGHT,
+  ERR_INFO_INVALID_CHROMA_WEIGHT,
+  ERR_INFO_INVALID_LUMA_OFFSET,
+  ERR_INFO_INVALID_CHROMA_OFFSET,
 
-ERR_INFO_UNSUPPORTED_NON_BASELINE,
-ERR_INFO_UNSUPPORTED_FMOTYPE,
-ERR_INFO_UNSUPPORTED_MBAFF,
-ERR_INFO_UNSUPPORTED_ILP,
-ERR_INFO_UNSUPPORTED_CABAC_EL,
-ERR_INFO_UNSUPPORTED_SPSI,
-ERR_INFO_UNSUPPORTED_MGS,
-ERR_INFO_UNSUPPORTED_BIPRED,
-ERR_INFO_UNSUPPORTED_WP,
-ERR_INFO_UNSUPPORTED_SLICESKIP,
+  ERR_INFO_UNSUPPORTED_NON_BASELINE,
+  ERR_INFO_UNSUPPORTED_FMOTYPE,
+  ERR_INFO_UNSUPPORTED_MBAFF,
+  ERR_INFO_UNSUPPORTED_ILP,
+  ERR_INFO_UNSUPPORTED_CABAC_EL,
+  ERR_INFO_UNSUPPORTED_SPSI,
+  ERR_INFO_UNSUPPORTED_MGS,
+  ERR_INFO_UNSUPPORTED_BIPRED,
+  ERR_INFO_UNSUPPORTED_WP,
+  ERR_INFO_UNSUPPORTED_SLICESKIP,
 
-ERR_INFO_FRAMES_LOST,
-ERR_INFO_DEPENDENCY_SPATIAL_LAYER_LOST,
-ERR_INFO_DEPENDENCY_QUALIT_LAYER_LOST,
-ERR_INFO_REFERENCE_PIC_LOST,
-ERR_INFO_INVALID_REORDERING,
-ERR_INFO_INVALID_MARKING,
+  ERR_INFO_FRAMES_LOST,
+  ERR_INFO_DEPENDENCY_SPATIAL_LAYER_LOST,
+  ERR_INFO_DEPENDENCY_QUALIT_LAYER_LOST,
+  ERR_INFO_REFERENCE_PIC_LOST,
+  ERR_INFO_INVALID_REORDERING,
+  ERR_INFO_INVALID_MARKING,
 
-ERR_INFO_FMO_NOT_SUPPORTED_IN_BASE_LAYER,
-ERR_INFO_INVALID_ESS,
-ERR_INFO_INVALID_SLICE_TYPE,
-ERR_INFO_INVALID_REF_MARKING,
-ERR_INFO_INVALID_REF_REORDERING,
+  ERR_INFO_FMO_NOT_SUPPORTED_IN_BASE_LAYER,
+  ERR_INFO_INVALID_ESS,
+  ERR_INFO_INVALID_SLICE_TYPE,
+  ERR_INFO_INVALID_REF_MARKING,
+  ERR_INFO_INVALID_REF_REORDERING,
 
-/* Error from corresponding logic, 10001-65535 */
-ERR_INFO_NO_IDR_PIC             = ERR_INFO_LOGIC_BASE,  // NO IDR picture available before sequence header
-ERR_INFO_EC_NO_NEIGHBOUR_MBS,
-ERR_INFO_EC_UNEXPECTED_MB_TYPE,
-ERR_INFO_EC_NO_ENOUGH_NEIGHBOUR_MBS,
-ERR_INFO_DUPLICATE_FRAME_NUM,
+  /* Error from corresponding logic, 10001-65535 */
+  ERR_INFO_NO_IDR_PIC             = ERR_INFO_LOGIC_BASE,  // NO IDR picture available before sequence header
+  ERR_INFO_EC_NO_NEIGHBOUR_MBS,
+  ERR_INFO_EC_UNEXPECTED_MB_TYPE,
+  ERR_INFO_EC_NO_ENOUGH_NEIGHBOUR_MBS,
+  ERR_INFO_DUPLICATE_FRAME_NUM,
 //for LTR
-ERR_INFO_INVALID_MMCO_NUM,
-ERR_INFO_INVALID_MMCO_OPCODE_BASE,
-ERR_INFO_INVALID_MMCO_SHORT2UNUSED,
-EER_INFO_INVALID_MMCO_LONG2UNUSED,
-ERR_INFO_INVALID_MMCO_SHOART2LONG,
-ERR_INFO_INVALID_MMCO_REF_NUM_OVERFLOW,
-ERR_INFO_INVALID_MMCO_REF_NUM_NOT_ENOUGH,
-ERR_INFO_INVALID_MMCO_LONG_TERM_IDX_EXCEED_MAX,
+  ERR_INFO_INVALID_MMCO_NUM,
+  ERR_INFO_INVALID_MMCO_OPCODE_BASE,
+  ERR_INFO_INVALID_MMCO_SHORT2UNUSED,
+  EER_INFO_INVALID_MMCO_LONG2UNUSED,
+  ERR_INFO_INVALID_MMCO_SHOART2LONG,
+  ERR_INFO_INVALID_MMCO_REF_NUM_OVERFLOW,
+  ERR_INFO_INVALID_MMCO_REF_NUM_NOT_ENOUGH,
+  ERR_INFO_INVALID_MMCO_LONG_TERM_IDX_EXCEED_MAX,
 //for CABAC
-ERR_CABAC_NO_BS_TO_READ,
-ERR_CABAC_UNEXPECTED_VALUE,
+  ERR_CABAC_NO_BS_TO_READ,
+  ERR_CABAC_UNEXPECTED_VALUE,
 //for scaling list
-ERR_SCALING_LIST_DELTA_SCALE,
+  ERR_SCALING_LIST_DELTA_SCALE,
 //logic error related to multi-layer
-ERR_INFO_WIDTH_MISMATCH,
+  ERR_INFO_WIDTH_MISMATCH,
 //reconstruction error
-ERR_INFO_MB_RECON_FAIL,
-ERR_INFO_MB_NUM_EXCEED_FAIL,
-ERR_INFO_BS_INCOMPLETE,
-ERR_INFO_MB_NUM_INADEQUATE,
+  ERR_INFO_MB_RECON_FAIL,
+  ERR_INFO_MB_NUM_EXCEED_FAIL,
+  ERR_INFO_BS_INCOMPLETE,
+  ERR_INFO_MB_NUM_INADEQUATE,
 //parse only error
-ERR_INFO_PARSEONLY_PENDING,
-ERR_INFO_PARSEONLY_ERROR,
+  ERR_INFO_PARSEONLY_PENDING,
+  ERR_INFO_PARSEONLY_ERROR,
 };
 //-----------------------------------------------------------------------------------------------------------
 
--- a/codec/decoder/core/inc/error_concealment.h
+++ b/codec/decoder/core/inc/error_concealment.h
@@ -50,7 +50,8 @@
 //Do error concealment using slice copy method
 void DoErrorConSliceCopy (PWelsDecoderContext pCtx);
 //Do error concealment using slice MV copy method
-void DoMbECMvCopy (PWelsDecoderContext pCtx, PPicture pDec, PPicture pRef, int32_t iMbXy, int32_t iMbX, int32_t iMbY, sMCRefMember* pMCRefMem, int32_t iCurrPoc);
+void DoMbECMvCopy (PWelsDecoderContext pCtx, PPicture pDec, PPicture pRef, int32_t iMbXy, int32_t iMbX, int32_t iMbY,
+                   sMCRefMember* pMCRefMem, int32_t iCurrPoc);
 void GetAvilInfoFromCorrectMb (PWelsDecoderContext pCtx);
 void DoErrorConSliceMVCopy (PWelsDecoderContext pCtx);
 //Mark erroneous frame as Ref Pic into DPB
--- a/codec/decoder/core/inc/fmo.h
+++ b/codec/decoder/core/inc/fmo.h
@@ -55,12 +55,12 @@
  * \brief   Wels Flexible Macroblock Ordering (FMO)
  */
 typedef struct TagFmo {
-uint8_t*        pMbAllocMap;
-int32_t         iCountMbNum;
-int32_t         iSliceGroupCount;
-int32_t         iSliceGroupType;
-bool            bActiveFlag;
-uint8_t         uiReserved[3];          // reserved padding bytes
+  uint8_t*        pMbAllocMap;
+  int32_t         iCountMbNum;
+  int32_t         iSliceGroupCount;
+  int32_t         iSliceGroupType;
+  bool            bActiveFlag;
+  uint8_t         uiReserved[3];          // reserved padding bytes
 } SFmo, *PFmo;
 
 
--- a/codec/decoder/core/inc/manage_dec_ref.h
+++ b/codec/decoder/core/inc/manage_dec_ref.h
@@ -49,7 +49,9 @@
 
 void  WelsResetRefPic (PWelsDecoderContext pCtx);
 int32_t WelsInitRefList (PWelsDecoderContext pCtx, int32_t iPoc);
+int32_t WelsInitBSliceRefList (PWelsDecoderContext pCtx, int32_t iPoc);
 int32_t WelsReorderRefList (PWelsDecoderContext pCtx);
+int32_t WelsReorderRefList2 (PWelsDecoderContext pCtx);
 int32_t WelsMarkAsRef (PWelsDecoderContext pCtx);
 
 } // namespace WelsDec
--- a/codec/decoder/core/inc/mv_pred.h
+++ b/codec/decoder/core/inc/mv_pred.h
@@ -42,6 +42,7 @@
 #define WELS_MV_PRED_H__
 
 #include "dec_frame.h"
+#include "decoder_context.h"
 
 namespace WelsDec {
 
@@ -50,9 +51,23 @@
 * \param
 * \param
 */
-void UpdateP16x16MotionInfo (PDqLayer pCurDqLayer, int8_t iRef, int16_t iMVs[2]);
+void UpdateP16x16MotionInfo (PDqLayer pCurDqLayer, int32_t listIdx, int8_t iRef, int16_t iMVs[2]);
 
 /*!
+* \brief     update ref_index cache for current MB, only for P_16x16 (SKIP inclusive)
+* \param
+* \param
+*/
+void UpdateP16x16RefIdx (PDqLayer pCurDqLayer, int32_t listIdx, int8_t iRef);
+
+/*!
+* \brief     update mv only cache for current MB, only for P_16x16 (SKIP inclusive)
+* \param
+* \param
+*/
+void UpdateP16x16MotionOnly (PDqLayer pCurDqLayer, int32_t listIdx, int16_t iMVs[2]);
+
+/*!
 * \brief   update mv and ref_index cache for current MB, only for P_16x8
 * \param
 * \param
@@ -59,7 +74,7 @@
 */
 void UpdateP16x8MotionInfo (PDqLayer pCurDqLayer, int16_t iMotionVector[LIST_A][30][MV_A],
                             int8_t iRefIndex[LIST_A][30],
-                            int32_t iPartIdx, int8_t iRef, int16_t iMVs[2]);
+                            int32_t listIdx, int32_t iPartIdx, int8_t iRef, int16_t iMVs[2]);
 
 
 /*!
@@ -69,7 +84,7 @@
  */
 void UpdateP8x16MotionInfo (PDqLayer pCurDqLayer, int16_t iMotionVector[LIST_A][30][MV_A],
                             int8_t iRefIndex[LIST_A][30],
-                            int32_t iPartIdx, int8_t iRef, int16_t iMVs[2]);
+                            int32_t listIdx, int32_t iPartIdx, int8_t iRef, int16_t iMVs[2]);
 
 /*!
  * \brief   get the motion predictor for skip mode
@@ -79,12 +94,37 @@
 void PredPSkipMvFromNeighbor (PDqLayer pCurLayer, int16_t iMvp[2]);
 
 /*!
+* \brief   get the motion predictor and reference for B-slice direct mode version 2
+* \param
+* \param   output iMvp[] and ref
+*/
+SubMbType  PredMvBDirectSpatial (PWelsDecoderContext pCtx, int16_t iMvp[LIST_A][2], int8_t ref[LIST_A]);
+
+/*!
+* \brief   get Colocated MB for both Spatial and Temporal Direct Mode
+* \param
+* \param   output MbType and SubMbType
+*/
+int32_t GetColocatedMb (PWelsDecoderContext pCtx, MbType& mbType, SubMbType& subMbType);
+
+/*!
+* \brief   get the motion predictor for B-slice temporal direct mode 16x16
+*/
+void PredBDirectTemporal (PWelsDecoderContext pCtx, int16_t iMvp[LIST_A][2], int8_t ref[LIST_A]);
+
+/*!
+* \brief   get the motion params for B-slice spatial direct mode
+* \param
+* \param   output iMvp[]
+*/
+
+/*!
  * \brief   get the motion predictor for 4*4 or 8*8 or 16*16 block
  * \param
  * \param   output iMvp[]
  */
 void PredMv (int16_t iMotionVector[LIST_A][30][MV_A], int8_t iRefIndex[LIST_A][30],
-             int32_t iPartIdx, int32_t iPartWidth, int8_t iRef, int16_t iMVP[2]);
+             int32_t listIdx, int32_t iPartIdx, int32_t iPartWidth, int8_t iRef, int16_t iMVP[2]);
 
 /*!
  * \brief   get the motion predictor for inter16x8 MB
@@ -92,7 +132,7 @@
  * \param   output mvp_x and mvp_y
  */
 void PredInter16x8Mv (int16_t iMotionVector[LIST_A][30][MV_A], int8_t iRefIndex[LIST_A][30],
-                      int32_t iPartIdx, int8_t iRef, int16_t iMVP[2]);
+                      int32_t listIdx, int32_t iPartIdx, int8_t iRef, int16_t iMVP[2]);
 
 /*!
  * \brief   get the motion predictor for inter8x16 MB
@@ -100,7 +140,7 @@
  * \param   output mvp_x and mvp_y
  */
 void PredInter8x16Mv (int16_t iMotionVector[LIST_A][30][MV_A], int8_t iRefIndex[LIST_A][30],
-                      int32_t iPartIdx, int8_t iRef, int16_t iMVP[2]);
+                      int32_t listIdx, int32_t iPartIdx, int8_t iRef, int16_t iMVP[2]);
 
 } // namespace WelsDec
 
--- a/codec/decoder/core/inc/nal_prefix.h
+++ b/codec/decoder/core/inc/nal_prefix.h
@@ -44,11 +44,11 @@
 
 /* Prefix NAL Unix syntax, refer to Page 392 in JVT X201wcm */
 typedef struct TagPrefixNalUnit {
-SRefBasePicMarking sRefPicBaseMarking;
-bool               bStoreRefBasePicFlag;
-bool               bPrefixNalUnitAdditionalExtFlag;
-bool               bPrefixNalUnitExtFlag;
-bool               bPrefixNalCorrectFlag;
+  SRefBasePicMarking sRefPicBaseMarking;
+  bool               bStoreRefBasePicFlag;
+  bool               bPrefixNalUnitAdditionalExtFlag;
+  bool               bPrefixNalUnitExtFlag;
+  bool               bPrefixNalCorrectFlag;
 } SPrefixNalUnit, *PPrefixNalUnit;
 
 } // namespace WelsDec
--- a/codec/decoder/core/inc/nalu.h
+++ b/codec/decoder/core/inc/nalu.h
@@ -45,19 +45,19 @@
 
 /* NAL Unit Structure */
 typedef struct TagNalUnit {
-SNalUnitHeaderExt       sNalHeaderExt;
+  SNalUnitHeaderExt       sNalHeaderExt;
 
-union {
-  struct SVclNal {
-    SSliceHeaderExt     sSliceHeaderExt;
-    SBitStringAux       sSliceBitsRead;
-    uint8_t*            pNalPos;         // save the address of slice nal for GPU function
-    int32_t             iNalLength;   // save the nal length for GPU function
-    bool                bSliceHeaderExtFlag;
-  } sVclNal;
-  SPrefixNalUnit        sPrefixNal;
-} sNalData;
-unsigned long long uiTimeStamp;
+  union {
+    struct SVclNal {
+      SSliceHeaderExt     sSliceHeaderExt;
+      SBitStringAux       sSliceBitsRead;
+      uint8_t*            pNalPos;         // save the address of slice nal for GPU function
+      int32_t             iNalLength;   // save the nal length for GPU function
+      bool                bSliceHeaderExtFlag;
+    } sVclNal;
+    SPrefixNalUnit        sPrefixNal;
+  } sNalData;
+  unsigned long long uiTimeStamp;
 } SNalUnit, *PNalUnit;
 
 ///////////////////////////////////ACCESS Unit level///////////////////////////////////
@@ -64,14 +64,14 @@
 
 /* Access Unit structure */
 typedef struct TagAccessUnits {
-PNalUnit*               pNalUnitsList;  // list of NAL Units pointer in this AU
-uint32_t                uiAvailUnitsNum;        // Number of NAL Units available in each AU list based current bitstream,
-uint32_t                uiActualUnitsNum;       // actual number of NAL units belong to current au
+  PNalUnit*               pNalUnitsList;  // list of NAL Units pointer in this AU
+  uint32_t                uiAvailUnitsNum;   // Number of NAL Units available in each AU list based current bitstream,
+  uint32_t                uiActualUnitsNum;       // actual number of NAL units belong to current au
 // While available number exceeds count size below, need realloc extra NAL Units for list space.
-uint32_t                uiCountUnitsNum;        // Count size number of malloced NAL Units in each AU list
-uint32_t                uiStartPos;
-uint32_t                uiEndPos;
-bool                    bCompletedAuFlag;       // Indicate whether it is a completed AU
+  uint32_t                uiCountUnitsNum;        // Count size number of malloced NAL Units in each AU list
+  uint32_t                uiStartPos;
+  uint32_t                uiEndPos;
+  bool                    bCompletedAuFlag;       // Indicate whether it is a completed AU
 } SAccessUnit, *PAccessUnit;
 
 } // namespace WelsDec
--- a/codec/decoder/core/inc/parameter_sets.h
+++ b/codec/decoder/core/inc/parameter_sets.h
@@ -38,40 +38,40 @@
 #include "wels_common_basis.h"
 
 namespace WelsDec {
-  /* VUI syntax in Sequence Parameter Set, refer to E.1 in Rec */
-  typedef struct TagVui {
-    bool bAspectRatioInfoPresentFlag;
-    uint32_t uiAspectRatioIdc;
-    uint32_t uiSarWidth;
-    uint32_t uiSarHeight;
-    bool bOverscanInfoPresentFlag;
-    bool bOverscanAppropriateFlag;
-    bool bVideoSignalTypePresentFlag;
-    uint8_t uiVideoFormat;
-    bool bVideoFullRangeFlag;
-    bool bColourDescripPresentFlag;
-    uint8_t uiColourPrimaries;
-    uint8_t uiTransferCharacteristics;
-    uint8_t uiMatrixCoeffs;
-    bool bChromaLocInfoPresentFlag;
-    uint32_t uiChromaSampleLocTypeTopField;
-    uint32_t uiChromaSampleLocTypeBottomField;
-    bool bTimingInfoPresentFlag;
-    uint32_t uiNumUnitsInTick;
-    uint32_t uiTimeScale;
-    bool bFixedFrameRateFlag;
-    bool bNalHrdParamPresentFlag;
-    bool bVclHrdParamPresentFlag;
-    bool bPicStructPresentFlag;
-    bool bBitstreamRestrictionFlag;
-    bool bMotionVectorsOverPicBoundariesFlag;
-    uint32_t uiMaxBytesPerPicDenom;
-    uint32_t uiMaxBitsPerMbDenom;
-    uint32_t uiLog2MaxMvLengthHorizontal;
-    uint32_t uiLog2MaxMvLengthVertical;
-    uint32_t uiMaxNumReorderFrames;
-    uint32_t uiMaxDecFrameBuffering;
-  } SVui, *PVui;
+/* VUI syntax in Sequence Parameter Set, refer to E.1 in Rec */
+typedef struct TagVui {
+  bool bAspectRatioInfoPresentFlag;
+  uint32_t uiAspectRatioIdc;
+  uint32_t uiSarWidth;
+  uint32_t uiSarHeight;
+  bool bOverscanInfoPresentFlag;
+  bool bOverscanAppropriateFlag;
+  bool bVideoSignalTypePresentFlag;
+  uint8_t uiVideoFormat;
+  bool bVideoFullRangeFlag;
+  bool bColourDescripPresentFlag;
+  uint8_t uiColourPrimaries;
+  uint8_t uiTransferCharacteristics;
+  uint8_t uiMatrixCoeffs;
+  bool bChromaLocInfoPresentFlag;
+  uint32_t uiChromaSampleLocTypeTopField;
+  uint32_t uiChromaSampleLocTypeBottomField;
+  bool bTimingInfoPresentFlag;
+  uint32_t uiNumUnitsInTick;
+  uint32_t uiTimeScale;
+  bool bFixedFrameRateFlag;
+  bool bNalHrdParamPresentFlag;
+  bool bVclHrdParamPresentFlag;
+  bool bPicStructPresentFlag;
+  bool bBitstreamRestrictionFlag;
+  bool bMotionVectorsOverPicBoundariesFlag;
+  uint32_t uiMaxBytesPerPicDenom;
+  uint32_t uiMaxBitsPerMbDenom;
+  uint32_t uiLog2MaxMvLengthHorizontal;
+  uint32_t uiLog2MaxMvLengthVertical;
+  uint32_t uiMaxNumReorderFrames;
+  uint32_t uiMaxDecFrameBuffering;
+} SVui, *PVui;
 
 /* Sequence Parameter Set, refer to Page 57 in JVT X201wcm */
 typedef struct TagSps {
@@ -125,7 +125,7 @@
   uint8_t  iScalingList4x4[6][16];
   uint8_t  iScalingList8x8[6][64];
   SVui sVui;
-const SLevelLimits* pSLevelLimits;
+  const SLevelLimits* pSLevelLimits;
 } SSps, *PSps;
 
 
--- a/codec/decoder/core/inc/parse_mb_syn_cabac.h
+++ b/codec/decoder/core/inc/parse_mb_syn_cabac.h
@@ -46,14 +46,20 @@
 int32_t ParseSkipFlagCabac (PWelsDecoderContext pCtx, PWelsNeighAvail pNeighAvail, uint32_t& uiSkip);
 int32_t ParseMBTypeISliceCabac (PWelsDecoderContext pCtx, PWelsNeighAvail pNeighAvail, uint32_t& uiBinVal);
 int32_t ParseMBTypePSliceCabac (PWelsDecoderContext pCtx, PWelsNeighAvail pNeighAvail, uint32_t& uiBinVal);
-int32_t ParseTransformSize8x8FlagCabac (PWelsDecoderContext pCtx, PWelsNeighAvail pNeighAvail, bool& bTransformSize8x8Flag);
+int32_t ParseMBTypeBSliceCabac (PWelsDecoderContext pCtx, PWelsNeighAvail pNeighAvail, uint32_t& uiBinVal);
+int32_t ParseTransformSize8x8FlagCabac (PWelsDecoderContext pCtx, PWelsNeighAvail pNeighAvail,
+                                        bool& bTransformSize8x8Flag);
 int32_t ParseSubMBTypeCabac (PWelsDecoderContext pCtx, PWelsNeighAvail pNeighAvail, uint32_t& uiSubMbType);
+int32_t ParseBSubMBTypeCabac (PWelsDecoderContext pCtx, PWelsNeighAvail pNeighAvail, uint32_t& uiSubMbType);
 int32_t ParseIntraPredModeLumaCabac (PWelsDecoderContext pCtx, int32_t& iBinVal);
 int32_t ParseIntraPredModeChromaCabac (PWelsDecoderContext pCtx, uint8_t uiNeighAvail, int32_t& iBinVal);
-int32_t ParseInterMotionInfoCabac (PWelsDecoderContext pCtx, PWelsNeighAvail pNeighAvail, uint8_t* pNonZeroCount,
-                                   int16_t pMotionVector[LIST_A][30][MV_A], int16_t pMvdCache[LIST_A][30][MV_A], int8_t pRefIndex[LIST_A][30]);
+int32_t ParseInterPMotionInfoCabac (PWelsDecoderContext pCtx, PWelsNeighAvail pNeighAvail, uint8_t* pNonZeroCount,
+                                    int16_t pMotionVector[LIST_A][30][MV_A], int16_t pMvdCache[LIST_A][30][MV_A], int8_t pRefIndex[LIST_A][30]);
+int32_t ParseInterBMotionInfoCabac (PWelsDecoderContext pCtx, PWelsNeighAvail pNeighAvail, uint8_t* pNonZeroCount,
+                                    int16_t pMotionVector[LIST_A][30][MV_A], int16_t pMvdCache[LIST_A][30][MV_A], int8_t pRefIndex[LIST_A][30],
+                                    int8_t pDirect[30]);
 int32_t ParseRefIdxCabac (PWelsDecoderContext pCtx, PWelsNeighAvail pNeighAvail, uint8_t* nzc,
-                          int8_t ref_idx[LIST_A][30],
+                          int8_t ref_idx[LIST_A][30], int8_t direct[30],
                           int32_t iListIdx, int32_t index, int32_t iActiveRefNum, int32_t b8mode, int8_t& iRefIdxVal);
 int32_t ParseMvdInfoCabac (PWelsDecoderContext pCtx, PWelsNeighAvail pNeighAvail, int8_t pRefIndex[LIST_A][30],
                            int16_t pMvdCache[LIST_A][30][2], int32_t index, int8_t iListIdx, int8_t iMvComp, int16_t& iMvdVal);
@@ -68,9 +74,14 @@
                                  int32_t index, int32_t iMaxNumCoeff, const uint8_t* pScanTable, int32_t iResProperty, int16_t* sTCoeff, uint8_t uiQp,
                                  PWelsDecoderContext pCtx);
 int32_t ParseResidualBlockCabac8x8 (PWelsNeighAvail pNeighAvail, uint8_t* pNonZeroCountCache, SBitStringAux* pBsAux,
-                                 int32_t index, int32_t iMaxNumCoeff, const uint8_t* pScanTable, int32_t iResProperty, int16_t* sTCoeff, uint8_t uiQp,
-                                 PWelsDecoderContext pCtx);
+                                    int32_t index, int32_t iMaxNumCoeff, const uint8_t* pScanTable, int32_t iResProperty, int16_t* sTCoeff, uint8_t uiQp,
+                                    PWelsDecoderContext pCtx);
 int32_t ParseIPCMInfoCabac (PWelsDecoderContext pCtx);
+void    UpdateP16x16MvdCabac (SDqLayer* pCurDqLayer, int16_t pMvd[2], const int8_t iListIdx);
+void    UpdateP8x8RefIdxCabac (PDqLayer pCurDqLayer, int8_t pRefIndex[LIST_A][30], int32_t iPartIdx, const int8_t iRef,
+                               const int8_t iListIdx);
+void    UpdateP8x8DirectCabac (PDqLayer pCurDqLayer, int32_t iPartIdx);
+void    UpdateP16x16DirectCabac (PDqLayer pCurDqLayer);
 }
 //#pragma pack()
 #endif
--- a/codec/decoder/core/inc/parse_mb_syn_cavlc.h
+++ b/codec/decoder/core/inc/parse_mb_syn_cavlc.h
@@ -58,7 +58,9 @@
 void WelsFillCacheConstrain1IntraNxN (PWelsNeighAvail pNeighAvail, uint8_t* pNonZeroCount, int8_t* pIntraPredMode,
                                       PDqLayer pCurLayer);
 void WelsFillCacheInterCabac (PWelsNeighAvail pNeighAvail, uint8_t* pNonZeroCount,
-                         int16_t iMvArray[LIST_A][30][MV_A], int16_t iMvdCache[LIST_A][30][MV_A], int8_t iRefIdxArray[LIST_A][30], PDqLayer pCurLayer);
+                              int16_t iMvArray[LIST_A][30][MV_A], int16_t iMvdCache[LIST_A][30][MV_A], int8_t iRefIdxArray[LIST_A][30],
+                              PDqLayer pCurLayer);
+void WelsFillDirectCacheCabac (PWelsNeighAvail pNeighAvail, int8_t iDirect[30], PDqLayer pCurLayer);
 void WelsFillCacheInter (PWelsNeighAvail pNeighAvail, uint8_t* pNonZeroCount,
                          int16_t iMvArray[LIST_A][30][MV_A], int8_t iRefIdxArray[LIST_A][30], PDqLayer pCurLayer);
 
@@ -109,18 +111,18 @@
 
 // Transform8x8
 int32_t WelsResidualBlockCavlc8x8 (SVlcTable* pVlcTable,
-                                uint8_t* pNonZeroCountCache,
-                                PBitStringAux pBs,
-                                /*int16_t* coeff_level,*/
-                                int32_t iIndex,
-                                int32_t iMaxNumCoeff,
-                                const uint8_t* kpZigzagTable,
-                                int32_t iResidualProperty,
-                                /*short *tCoeffLevel,*/
-                                int16_t* pTCoeff,
-                                int32_t  iIdx4x4,
-                                uint8_t uiQp,
-                                PWelsDecoderContext pCtx);
+                                   uint8_t* pNonZeroCountCache,
+                                   PBitStringAux pBs,
+                                   /*int16_t* coeff_level,*/
+                                   int32_t iIndex,
+                                   int32_t iMaxNumCoeff,
+                                   const uint8_t* kpZigzagTable,
+                                   int32_t iResidualProperty,
+                                   /*short *tCoeffLevel,*/
+                                   int16_t* pTCoeff,
+                                   int32_t  iIdx4x4,
+                                   uint8_t uiQp,
+                                   PWelsDecoderContext pCtx);
 
 /*!
  * \brief   parsing inter info (including ref_index and pMvd)
--- a/codec/decoder/core/inc/pic_queue.h
+++ b/codec/decoder/core/inc/pic_queue.h
@@ -43,9 +43,9 @@
 
 
 typedef struct TagPicBuff {
-PPicture*      ppPic;
-int32_t        iCapacity;  // capacity size of queue
-int32_t        iCurrentIdx;
+  PPicture*      ppPic;
+  int32_t        iCapacity;  // capacity size of queue
+  int32_t        iCurrentIdx;
 } SPicBuff, *PPicBuff;
 
 /*
--- a/codec/decoder/core/inc/picture.h
+++ b/codec/decoder/core/inc/picture.h
@@ -35,54 +35,69 @@
 #define WELS_PICTURE_H__
 
 #include "typedefs.h"
+#include "wels_common_defs.h"
+#include "wels_const_common.h"
 
+using namespace WelsCommon;
+
 namespace WelsDec {
 
 /*
- *  Reconstructed Picture definition
- *  It is used to express reference picture, also consequent reconstruction picture for output
- */
-typedef struct TagPicture {
-/************************************payload data*********************************/
-uint8_t*        pBuffer[4];             // pointer to the first allocated byte, basical offset of buffer, dimension:
-uint8_t*        pData[4];               // pointer to picture planes respectively
-int32_t         iLinesize[4];// linesize of picture planes respectively used currently
-int32_t         iPlanes;                        // How many planes are introduced due to color space format?
+*  Reconstructed Picture definition
+*  It is used to express reference picture, also consequent reconstruction picture for output
+*/
+
+struct SPicture {
+  /************************************payload data*********************************/
+  uint8_t*        pBuffer[4];             // pointer to the first allocated byte, basical offset of buffer, dimension:
+  uint8_t*        pData[4];               // pointer to picture planes respectively
+  int32_t         iLinesize[4];// linesize of picture planes respectively used currently
+  int32_t         iPlanes;                        // How many planes are introduced due to color space format?
 // picture information
 
-/*******************************from EC mv copy****************************/
-bool bIdrFlag;
+  /*******************************from EC mv copy****************************/
+  bool bIdrFlag;
 
-/*******************************from other standard syntax****************************/
-/*from sps*/
-int32_t         iWidthInPixel;  // picture width in pixel
-int32_t         iHeightInPixel;// picture height in pixel
-/*from slice header*/
-int32_t         iFramePoc;              // frame POC
+  /*******************************from other standard syntax****************************/
+  /*from sps*/
+  int32_t         iWidthInPixel;  // picture width in pixel
+  int32_t         iHeightInPixel;// picture height in pixel
+  /*from slice header*/
+  int32_t         iFramePoc;              // frame POC
 
-/*******************************sef_definition for misc use****************************/
-bool            bUsedAsRef;                                                     //for ref pic management
-bool            bIsLongRef;     // long term reference frame flag       //for ref pic management
-uint8_t         uiRefCount;
-bool            bAvailableFlag; // indicate whether it is available in this picture memory block.
+  /*******************************sef_definition for misc use****************************/
+  bool            bUsedAsRef;                                                     //for ref pic management
+  bool            bIsLongRef;     // long term reference frame flag       //for ref pic management
+  uint8_t         uiRefCount;
+  bool            bAvailableFlag; // indicate whether it is available in this picture memory block.
 
-bool            bIsComplete;    // indicate whether current picture is complete, not from EC
-/*******************************for future use****************************/
-uint8_t         uiTemporalId;
-uint8_t         uiSpatialId;
-uint8_t         uiQualityId;
+  bool            bIsComplete;    // indicate whether current picture is complete, not from EC
+  /*******************************for future use****************************/
+  uint8_t         uiTemporalId;
+  uint8_t         uiSpatialId;
+  uint8_t         uiQualityId;
 
-int32_t         iFrameNum;              // frame number                 //for ref pic management
-int32_t         iLongTermFrameIdx;                                      //id for long term ref pic
+  int32_t         iFrameNum;              // frame number                 //for ref pic management
+  int32_t         iFrameWrapNum;          // frame wrap number            //for ref pic management
+  int32_t         iLongTermFrameIdx;                                      //id for long term ref pic
+  uint32_t        uiLongTermPicNum;       //long_term_pic_num
 
-int32_t     iSpsId; //against mosaic caused by cross-IDR interval reference.
-int32_t     iPpsId;
-unsigned long long uiTimeStamp;
-bool bNewSeqBegin;
-int32_t iMbEcedNum;
-int32_t iMbEcedPropNum;
-int32_t iMbNum;
-} SPicture, *PPicture; // "Picture" declaration is comflict with Mac system
+  int32_t     iSpsId; //against mosaic caused by cross-IDR interval reference.
+  int32_t     iPpsId;
+  unsigned long long uiTimeStamp;
+  bool bNewSeqBegin;
+  int32_t iMbEcedNum;
+  int32_t iMbEcedPropNum;
+  int32_t iMbNum;
+
+  uint32_t*  pMbType; // mb type used for direct mode
+  int16_t (*pMv[LIST_A])[MB_BLOCK4x4_NUM][MV_A]; // used for direct mode
+  int8_t (*pRefIndex[LIST_A])[MB_BLOCK4x4_NUM]; //used for direct mode
+  struct SPicture* pRefPic[LIST_A][17];  //ref pictures used for direct mode
+
+};// "Picture" declaration is comflict with Mac system
+
+typedef struct SPicture* PPicture;
 
 } // namespace WelsDec
 
--- a/codec/decoder/core/inc/rec_mb.h
+++ b/codec/decoder/core/inc/rec_mb.h
@@ -68,7 +68,7 @@
 } sMCRefMember;
 
 void BaseMC (sMCRefMember* pMCRefMem, int32_t iXOffset, int32_t iYOffset, SMcFunc* pMCFunc,
-                           int32_t iBlkWidth, int32_t iBlkHeight, int16_t iMVs[2]);
+             int32_t iBlkWidth, int32_t iBlkHeight, int16_t iMVs[2]);
 
 void WelsFillRecNeededMbInfo (PWelsDecoderContext pCtx, bool bOutput, PDqLayer pCurLayer);
 
@@ -87,6 +87,8 @@
 int32_t RecChroma (int32_t iMBXY, PWelsDecoderContext pCtx, int16_t* pScoeffLevel, PDqLayer pDqLayer);
 
 void GetInterPred (uint8_t* pPredY, uint8_t* pPredCb, uint8_t* pPredCr, PWelsDecoderContext pCtx);
+
+void GetInterBPred (uint8_t* pPredYCbCr[3], uint8_t* pTempPredYCbCr[3], PWelsDecoderContext pCtx);
 
 } // namespace WelsDec
 
--- a/codec/decoder/core/inc/slice.h
+++ b/codec/decoder/core/inc/slice.h
@@ -46,12 +46,12 @@
  *  Reference picture list reordering syntax, refer to page 64 in JVT X201wcm
  */
 typedef struct TagRefPicListReorderSyntax {
-struct {
-  uint32_t    uiAbsDiffPicNumMinus1;
-  uint16_t    uiLongTermPicNum;
-  uint16_t    uiReorderingOfPicNumsIdc;
-} sReorderingSyn[LIST_A][MAX_REF_PIC_COUNT];
-bool          bRefPicListReorderingFlag[LIST_A];
+  struct {
+    uint32_t    uiAbsDiffPicNumMinus1;
+    uint16_t    uiLongTermPicNum;
+    uint16_t    uiReorderingOfPicNumsIdc;
+  } sReorderingSyn[LIST_A][MAX_REF_PIC_COUNT];
+  bool          bRefPicListReorderingFlag[LIST_A];
 } SRefPicListReorderSyn, *PRefPicListReorderSyn;
 
 /*
@@ -58,146 +58,149 @@
  *  Prediction weight table syntax, refer to page 65 in JVT X201wcm
  */
 typedef struct TagPredWeightTabSyntax {
-uint32_t  uiLumaLog2WeightDenom;
-uint32_t  uiChromaLog2WeightDenom;
-struct {
-  int32_t iLumaWeight[MAX_REF_PIC_COUNT];
-  int32_t iLumaOffset[MAX_REF_PIC_COUNT];
-  int32_t iChromaWeight[MAX_REF_PIC_COUNT][2];
-  int32_t iChromaOffset[MAX_REF_PIC_COUNT][2];
-  bool    bLumaWeightFlag;
-  bool    bChromaWeightFlag;
-} sPredList[LIST_A];
-} SPredWeightTabSyn,*PPredWeightTabSyn;
+  uint32_t  uiLumaLog2WeightDenom;
+  uint32_t  uiChromaLog2WeightDenom;
+  struct {
+    int32_t iLumaWeight[MAX_REF_PIC_COUNT];
+    int32_t iLumaOffset[MAX_REF_PIC_COUNT];
+    int32_t iChromaWeight[MAX_REF_PIC_COUNT][2];
+    int32_t iChromaOffset[MAX_REF_PIC_COUNT][2];
+    bool    bLumaWeightFlag;
+    bool    bChromaWeightFlag;
+  } sPredList[LIST_A];
+  int32_t   iImplicitWeight[MAX_REF_PIC_COUNT][MAX_REF_PIC_COUNT];
+} SPredWeightTabSyn, *PPredWeightTabSyn;
 
 /* Decoded reference picture marking syntax, refer to Page 66 in JVT X201wcm */
 typedef struct TagRefPicMarking {
-struct {
-  uint32_t    uiMmcoType;
-  int32_t     iShortFrameNum;
-  int32_t     iDiffOfPicNum;
-  uint32_t    uiLongTermPicNum;
-  int32_t     iLongTermFrameIdx;
-  int32_t     iMaxLongTermFrameIdx;
-} sMmcoRef[MAX_MMCO_COUNT];
+  struct {
+    uint32_t    uiMmcoType;
+    int32_t     iShortFrameNum;
+    int32_t     iDiffOfPicNum;
+    uint32_t    uiLongTermPicNum;
+    int32_t     iLongTermFrameIdx;
+    int32_t     iMaxLongTermFrameIdx;
+  } sMmcoRef[MAX_MMCO_COUNT];
 
-bool          bNoOutputOfPriorPicsFlag;
-bool          bLongTermRefFlag;
-bool          bAdaptiveRefPicMarkingModeFlag;
+  bool          bNoOutputOfPriorPicsFlag;
+  bool          bLongTermRefFlag;
+  bool          bAdaptiveRefPicMarkingModeFlag;
 } SRefPicMarking, *PRefPicMarking;
 
 /* Decode reference base picture marking syntax in Page 396 of JVT X201wcm */
 typedef struct TagRefBasePicMarkingSyn {
-struct {
-  uint32_t      uiMmcoType;
-  int32_t       iShortFrameNum;
-  uint32_t      uiDiffOfPicNums;
-  uint32_t      uiLongTermPicNum; //should uint32_t, cover larger range of iFrameNum.
-} mmco_base[MAX_MMCO_COUNT];    // MAX_REF_PIC for reference picture based on frame
+  struct {
+    uint32_t      uiMmcoType;
+    int32_t       iShortFrameNum;
+    uint32_t      uiDiffOfPicNums;
+    uint32_t      uiLongTermPicNum; //should uint32_t, cover larger range of iFrameNum.
+  } mmco_base[MAX_MMCO_COUNT];    // MAX_REF_PIC for reference picture based on frame
 
-bool            bAdaptiveRefBasePicMarkingModeFlag;
+  bool            bAdaptiveRefBasePicMarkingModeFlag;
 } SRefBasePicMarking, *PRefBasePicMarking;
 
 /* Header of slice syntax elements, refer to Page 63 in JVT X201wcm */
 typedef struct TagSliceHeaders {
-/*****************************slice header syntax and generated****************************/
-int32_t         iFirstMbInSlice;
-int32_t         iFrameNum;
-int32_t         iPicOrderCntLsb;
-int32_t         iDeltaPicOrderCntBottom;
-int32_t         iDeltaPicOrderCnt[2];
-int32_t         iRedundantPicCnt;
-int32_t         uiRefCount[LIST_A];
-int32_t         iSliceQpDelta;  //no use for iSliceQp is used directly
-int32_t         iSliceQp;
-int32_t         iSliceQsDelta;  // For SP/SI slices
-uint32_t        uiDisableDeblockingFilterIdc;
-int32_t         iSliceAlphaC0Offset;
-int32_t         iSliceBetaOffset;
-int32_t         iSliceGroupChangeCycle;
+  /*****************************slice header syntax and generated****************************/
+  int32_t         iFirstMbInSlice;
+  int32_t         iFrameNum;
+  int32_t         iPicOrderCntLsb;
+  int32_t         iDeltaPicOrderCntBottom;
+  int32_t         iDeltaPicOrderCnt[2];
+  int32_t         iRedundantPicCnt;
+  int32_t         iDirectSpatialMvPredFlag; //!< Direct Mode type to be used (0: Temporal, 1: Spatial)
+  int32_t         uiRefCount[LIST_A];
+  int32_t         iSliceQpDelta;  //no use for iSliceQp is used directly
+  int32_t         iSliceQp;
+  int32_t         iSliceQsDelta;  // For SP/SI slices
+  uint32_t        uiDisableDeblockingFilterIdc;
+  int32_t         iSliceAlphaC0Offset;
+  int32_t         iSliceBetaOffset;
+  int32_t         iSliceGroupChangeCycle;
 
-PSps            pSps;
-PPps            pPps;
-int32_t         iSpsId;
-int32_t         iPpsId;
-bool bIdrFlag;
+  PSps            pSps;
+  PPps            pPps;
+  int32_t         iSpsId;
+  int32_t         iPpsId;
+  bool bIdrFlag;
 
-/*********************got from other layer for efficency if possible*********************/
-SRefPicListReorderSyn   pRefPicListReordering;  // Reference picture list reordering syntaxs
-SPredWeightTabSyn       sPredWeightTable;
-int32_t                 iCabacInitIdc;
-int32_t                 iMbWidth;       //from?
-int32_t                 iMbHeight; //from?
-SRefPicMarking          sRefMarking;    // Decoded reference picture marking syntaxs
+  /*********************got from other layer for efficency if possible*********************/
+  SRefPicListReorderSyn   pRefPicListReordering;  // Reference picture list reordering syntaxs
+  SPredWeightTabSyn       sPredWeightTable;
+  int32_t                 iCabacInitIdc;
+  int32_t                 iMbWidth;       //from?
+  int32_t                 iMbHeight; //from?
+  SRefPicMarking          sRefMarking;    // Decoded reference picture marking syntaxs
 
-uint16_t    uiIdrPicId;
-EWelsSliceType  eSliceType;
-bool            bNumRefIdxActiveOverrideFlag;
-bool            bFieldPicFlag;          //not supported in base profile
-bool            bBottomFiledFlag;               //not supported in base profile
-uint8_t         uiPadding1Byte;
-bool            bSpForSwitchFlag;                       // For SP/SI slices
-int16_t         iPadding2Bytes;
+  uint16_t    uiIdrPicId;
+  EWelsSliceType  eSliceType;
+  bool            bNumRefIdxActiveOverrideFlag;
+  bool            bFieldPicFlag;          //not supported in base profile
+  bool            bBottomFiledFlag;               //not supported in base profile
+  uint8_t         uiPadding1Byte;
+  bool            bSpForSwitchFlag;                       // For SP/SI slices
+  int16_t         iPadding2Bytes;
 } SSliceHeader, *PSliceHeader;
 
 
 /* Slice header in scalable extension syntax, refer to Page 394 in JVT X201wcm */
 typedef struct TagSliceHeaderExt {
-SSliceHeader    sSliceHeader;
-PSubsetSps      pSubsetSps;
+  SSliceHeader    sSliceHeader;
+  PSubsetSps      pSubsetSps;
 
-uint32_t        uiDisableInterLayerDeblockingFilterIdc;
-int32_t         iInterLayerSliceAlphaC0Offset;
-int32_t         iInterLayerSliceBetaOffset;
+  uint32_t        uiDisableInterLayerDeblockingFilterIdc;
+  int32_t         iInterLayerSliceAlphaC0Offset;
+  int32_t         iInterLayerSliceBetaOffset;
 
 //SPosOffset sScaledRefLayer;
-int32_t         iScaledRefLayerPicWidthInSampleLuma;
-int32_t         iScaledRefLayerPicHeightInSampleLuma;
+  int32_t         iScaledRefLayerPicWidthInSampleLuma;
+  int32_t         iScaledRefLayerPicHeightInSampleLuma;
 
-SRefBasePicMarking sRefBasePicMarking;
-bool            bBasePredWeightTableFlag;
-bool            bStoreRefBasePicFlag;
-bool            bConstrainedIntraResamplingFlag;
-bool            bSliceSkipFlag;
+  SRefBasePicMarking sRefBasePicMarking;
+  bool            bBasePredWeightTableFlag;
+  bool            bStoreRefBasePicFlag;
+  bool            bConstrainedIntraResamplingFlag;
+  bool            bSliceSkipFlag;
 
-bool            bAdaptiveBaseModeFlag;
-bool            bDefaultBaseModeFlag;
-bool            bAdaptiveMotionPredFlag;
-bool            bDefaultMotionPredFlag;
-bool            bAdaptiveResidualPredFlag;
-bool            bDefaultResidualPredFlag;
-bool            bTCoeffLevelPredFlag;
-uint8_t         uiRefLayerChromaPhaseXPlus1Flag;
+  bool            bAdaptiveBaseModeFlag;
+  bool            bDefaultBaseModeFlag;
+  bool            bAdaptiveMotionPredFlag;
+  bool            bDefaultMotionPredFlag;
+  bool            bAdaptiveResidualPredFlag;
+  bool            bDefaultResidualPredFlag;
+  bool            bTCoeffLevelPredFlag;
+  uint8_t         uiRefLayerChromaPhaseXPlus1Flag;
 
-uint8_t         uiRefLayerChromaPhaseYPlus1;
-uint8_t         uiRefLayerDqId;
-uint8_t         uiScanIdxStart;
-uint8_t         uiScanIdxEnd;
+  uint8_t         uiRefLayerChromaPhaseYPlus1;
+  uint8_t         uiRefLayerDqId;
+  uint8_t         uiScanIdxStart;
+  uint8_t         uiScanIdxEnd;
 } SSliceHeaderExt, *PSliceHeaderExt;
 
 
 typedef struct TagSlice {
-/*******************************slice_header****************************/
-SSliceHeaderExt sSliceHeaderExt;
+  /*******************************slice_header****************************/
+  SSliceHeaderExt sSliceHeaderExt;
 
-/*******************************use for future****************************/
+  /*******************************use for future****************************/
 // for Macroblock coding within slice
-int32_t         iLastMbQp;              // stored qp for last mb coded, maybe more efficient for mb skip detection etc.
+  int32_t         iLastMbQp;              // stored qp for last mb coded, maybe more efficient for mb skip detection etc.
 
-/*******************************slice_data****************************/
-/*slice_data_ext()*/
-int32_t         iMbSkipRun;
-int32_t         iTotalMbInCurSlice; //record the total number of MB in current slice.
+  /*******************************slice_data****************************/
+  /*slice_data_ext()*/
+  int32_t         iMbSkipRun;
+  int32_t         iTotalMbInCurSlice; //record the total number of MB in current slice.
 
-/*slice_data_ext() generate*/
+  /*slice_data_ext() generate*/
 
-/*******************************misc use****************************/
-bool            bSliceHeaderExtFlag; // Indicate which slice header is used, avc or ext?
-/*************got from other layer for effiency if possible***************/
-/*from lower layer: slice header*/
-uint8_t         eSliceType;
-uint8_t         uiPadding[2];
-int32_t         iLastDeltaQp;
+  /*******************************misc use****************************/
+  bool            bSliceHeaderExtFlag; // Indicate which slice header is used, avc or ext?
+  /*************got from other layer for effiency if possible***************/
+  /*from lower layer: slice header*/
+  uint8_t         eSliceType;
+  uint8_t         uiPadding[2];
+  int32_t         iLastDeltaQp;
+  int16_t         iMvScale[LIST_A][MAX_DPB_COUNT]; //Moton vector scale For Temporal Direct Mode Type
 } SSlice, *PSlice;
 
 } // namespace WelsDec
--- a/codec/decoder/core/inc/wels_common_basis.h
+++ b/codec/decoder/core/inc/wels_common_basis.h
@@ -48,7 +48,7 @@
 extern const uint8_t g_kuiLumaDcZigzagScan[16];
 extern const uint8_t g_kuiChromaDcScan[4];
 extern const uint8_t g_kMbNonZeroCountIdx[24];
-extern const uint8_t g_kCacheNzcScanIdx[4*4+4+4+3];
+extern const uint8_t g_kCacheNzcScanIdx[4 * 4 + 4 + 4 + 3];
 extern const uint8_t g_kCache26ScanIdx[16];
 extern const uint8_t g_kCache30ScanIdx[16];
 extern const uint8_t g_kNonZeroScanIdxC[4];
@@ -57,15 +57,15 @@
 
 /* Position Offset structure */
 typedef struct TagPosOffset {
-int32_t iLeftOffset;
-int32_t iTopOffset;
-int32_t iRightOffset;
-int32_t iBottomOffset;
+  int32_t iLeftOffset;
+  int32_t iTopOffset;
+  int32_t iRightOffset;
+  int32_t iBottomOffset;
 } SPosOffset;
 
 /* MB Type & Sub-MB Type */
-typedef int32_t MbType;
-typedef int32_t SubMbType;
+typedef uint32_t MbType;
+typedef uint32_t SubMbType;
 
 #define I16_LUMA_DC  1
 #define I16_LUMA_AC  2
@@ -90,43 +90,43 @@
 #define POP_BUFFER(pBitsCache, iCount)  { pBitsCache->uiCache32Bit <<= iCount;  pBitsCache->uiRemainBits -= iCount; }
 
 static const uint8_t g_kuiZigzagScan[16] = { //4*4block residual zig-zag scan order
-    0,  1,  4,  8,
-    5,  2,  3,  6,
-    9, 12, 13, 10,
-    7, 11, 14, 15,
+  0,  1,  4,  8,
+  5,  2,  3,  6,
+  9, 12, 13, 10,
+  7, 11, 14, 15,
 };
 
 static const uint8_t g_kuiZigzagScan8x8[64] = { //8x8 block residual zig-zag scan order
-    0,  1,  8,  16, 9,  2,  3,  10,
-    17, 24, 32, 25, 18, 11, 4,  5,
-    12, 19, 26, 33, 40, 48, 41, 34,
-    27, 20, 13, 6,  7,  14, 21, 28,
-    35, 42, 49, 56, 57, 50, 43, 36,
-    29, 22, 15, 23, 30, 37, 44, 51,
-    58, 59, 52, 45, 38, 31, 39, 46,
-    53, 60, 61, 54, 47, 55, 62, 63,
+  0,  1,  8,  16, 9,  2,  3,  10,
+  17, 24, 32, 25, 18, 11, 4,  5,
+  12, 19, 26, 33, 40, 48, 41, 34,
+  27, 20, 13, 6,  7,  14, 21, 28,
+  35, 42, 49, 56, 57, 50, 43, 36,
+  29, 22, 15, 23, 30, 37, 44, 51,
+  58, 59, 52, 45, 38, 31, 39, 46,
+  53, 60, 61, 54, 47, 55, 62, 63,
 };
 
 static const uint8_t g_kuiIdx2CtxSignificantCoeffFlag8x8[64] = {  // Table 9-43, Page 289
-    0,  1,  2,  3,  4,  5,  5,  4,
-    4,  3,  3,  4,  4,  4,  5,  5,
-    4,  4,  4,  4,  3,  3,  6,  7,
-    7,  7,  8,  9, 10,  9,  8,  7,
-    7,  6, 11, 12, 13, 11,  6,  7,
-    8,  9, 14, 10,  9,  8,  6, 11,
-    12, 13, 11, 6,  9, 14, 10,  9,
-    11, 12, 13, 11 ,14, 10, 12, 14,
+  0,  1,  2,  3,  4,  5,  5,  4,
+  4,  3,  3,  4,  4,  4,  5,  5,
+  4,  4,  4,  4,  3,  3,  6,  7,
+  7,  7,  8,  9, 10,  9,  8,  7,
+  7,  6, 11, 12, 13, 11,  6,  7,
+  8,  9, 14, 10,  9,  8,  6, 11,
+  12, 13, 11, 6,  9, 14, 10,  9,
+  11, 12, 13, 11, 14, 10, 12, 14,
 };
 
 static const uint8_t g_kuiIdx2CtxLastSignificantCoeffFlag8x8[64] = { // Table 9-43, Page 289
-    0,  1,  1,  1,  1,  1,  1,  1,
-    1,  1,  1,  1,  1,  1,  1,  1,
-    2,  2,  2,  2,  2,  2,  2,  2,
-    2,  2,  2,  2,  2,  2,  2,  2,
-    3,  3,  3,  3,  3,  3,  3,  3,
-    4,  4,  4,  4,  4,  4,  4,  4,
-    5,  5,  5,  5,  6,  6,  6,  6,
-    7,  7,  7,  7,  8,  8,  8,  8,
+  0,  1,  1,  1,  1,  1,  1,  1,
+  1,  1,  1,  1,  1,  1,  1,  1,
+  2,  2,  2,  2,  2,  2,  2,  2,
+  2,  2,  2,  2,  2,  2,  2,  2,
+  3,  3,  3,  3,  3,  3,  3,  3,
+  4,  4,  4,  4,  4,  4,  4,  4,
+  5,  5,  5,  5,  6,  6,  6,  6,
+  7,  7,  7,  7,  8,  8,  8,  8,
 };
 
 static inline void GetMbResProperty (int32_t* pMBproperty, int32_t* pResidualProperty, bool bCavlc) {
@@ -190,43 +190,43 @@
 }
 
 typedef struct TagI16PredInfo {
-    int8_t iPredMode;
-    int8_t iLeftAvail;
-    int8_t iTopAvail;
-    int8_t iLeftTopAvail;
+  int8_t iPredMode;
+  int8_t iLeftAvail;
+  int8_t iTopAvail;
+  int8_t iLeftTopAvail;
 } SI16PredInfo;
 static const SI16PredInfo g_ksI16PredInfo[4] = {
-    {I16_PRED_V, 0, 1, 0},
-    {I16_PRED_H, 1, 0, 0},
-    {         0, 0, 0, 0},
-    {I16_PRED_P, 1, 1, 1},
+  {I16_PRED_V, 0, 1, 0},
+  {I16_PRED_H, 1, 0, 0},
+  {         0, 0, 0, 0},
+  {I16_PRED_P, 1, 1, 1},
 };
 
 static const SI16PredInfo g_ksChromaPredInfo[4] = {
-    {       0, 0, 0, 0},
-    {C_PRED_H, 1, 0, 0},
-    {C_PRED_V, 0, 1, 0},
-    {C_PRED_P, 1, 1, 1},
+  {       0, 0, 0, 0},
+  {C_PRED_H, 1, 0, 0},
+  {C_PRED_V, 0, 1, 0},
+  {C_PRED_P, 1, 1, 1},
 };
 
 
 typedef struct TagI4PredInfo {
-    int8_t iPredMode;
-    int8_t iLeftAvail;
-    int8_t iTopAvail;
-    int8_t iLeftTopAvail;
-    // int8_t right_top_avail; //when right_top unavailable but top avail, we can pad the right-top with the rightmost pixel of top
+  int8_t iPredMode;
+  int8_t iLeftAvail;
+  int8_t iTopAvail;
+  int8_t iLeftTopAvail;
+  // int8_t right_top_avail; //when right_top unavailable but top avail, we can pad the right-top with the rightmost pixel of top
 } SI4PredInfo;
 static const SI4PredInfo g_ksI4PredInfo[9] = {
-    {  I4_PRED_V, 0, 1, 0},
-    {  I4_PRED_H, 1, 0, 0},
-    {          0, 0, 0, 0},
-    {I4_PRED_DDL, 0, 1, 0},
-    {I4_PRED_DDR, 1, 1, 1},
-    { I4_PRED_VR, 1, 1, 1},
-    { I4_PRED_HD, 1, 1, 1},
-    { I4_PRED_VL, 0, 1, 0},
-    { I4_PRED_HU, 1, 0, 0},
+  {  I4_PRED_V, 0, 1, 0},
+  {  I4_PRED_H, 1, 0, 0},
+  {          0, 0, 0, 0},
+  {I4_PRED_DDL, 0, 1, 0},
+  {I4_PRED_DDR, 1, 1, 1},
+  { I4_PRED_VR, 1, 1, 1},
+  { I4_PRED_HD, 1, 1, 1},
+  { I4_PRED_VL, 0, 1, 0},
+  { I4_PRED_HU, 1, 0, 0},
 };
 
 static const uint8_t g_kuiI16CbpTable[6] = {0, 16, 32, 15, 31, 47};
@@ -233,31 +233,80 @@
 
 
 typedef struct TagPartMbInfo {
-    MbType iType;
-    int8_t iPartCount; //P_16*16, P_16*8, P_8*16, P_8*8 based on 8*8 block; P_8*4, P_4*8, P_4*4 based on 4*4 block
-    int8_t iPartWidth; //based on 4*4 block
+  MbType iType;
+  int8_t iPartCount; //P_16*16, P_16*8, P_8*16, P_8*8 based on 8*8 block; P_8*4, P_4*8, P_4*4 based on 4*4 block
+  int8_t iPartWidth; //based on 4*4 block
 } SPartMbInfo;
-static const SPartMbInfo g_ksInterMbTypeInfo[5] = {
-    {MB_TYPE_16x16,    1, 4},
-    {MB_TYPE_16x8,     2, 4},
-    {MB_TYPE_8x16,     2, 2},
-    {MB_TYPE_8x8,      4, 4},
-    {MB_TYPE_8x8_REF0, 4, 4}, //ref0--ref_idx not present in bit-stream and default as 0
+
+//Table 7.13. Macroblock type values 0 to 4 for P slices.
+static const SPartMbInfo g_ksInterPMbTypeInfo[5] = {
+  {MB_TYPE_16x16,    1, 4},
+  {MB_TYPE_16x8,     2, 4},
+  {MB_TYPE_8x16,     2, 2},
+  {MB_TYPE_8x8,      4, 4},
+  {MB_TYPE_8x8_REF0, 4, 4}, //ref0--ref_idx not present in bit-stream and default as 0
 };
-static const SPartMbInfo g_ksInterSubMbTypeInfo[4] = {
-    {SUB_MB_TYPE_8x8, 1, 2},
-    {SUB_MB_TYPE_8x4, 2, 2},
-    {SUB_MB_TYPE_4x8, 2, 1},
-    {SUB_MB_TYPE_4x4, 4, 1},
+
+//Table 7.14. Macroblock type values 0 to 22 for B slices.
+static const SPartMbInfo g_ksInterBMbTypeInfo[] = {
+  //            Part 0        Part 1
+  { MB_TYPE_DIRECT, 1, 4 }, //B_Direct_16x16
+  { MB_TYPE_16x16 | MB_TYPE_P0L0, 1, 4 }, //B_L0_16x16
+  { MB_TYPE_16x16 | MB_TYPE_P0L1, 1, 4 }, //B_L1_16x16
+  { MB_TYPE_16x16 | MB_TYPE_P0L0 | MB_TYPE_P0L1, 1, 4 },  //B_Bi_16x16
+  { MB_TYPE_16x8  | MB_TYPE_P0L0 | MB_TYPE_P1L0, 2, 4 },    //B_L0_L0_16x8
+  { MB_TYPE_8x16  | MB_TYPE_P0L0 | MB_TYPE_P1L0, 2, 2 },    //B_L0_L0_8x16
+  { MB_TYPE_16x8  | MB_TYPE_P0L1 | MB_TYPE_P1L1, 2, 4 },    //B_L1_L1_16x8
+  { MB_TYPE_8x16  | MB_TYPE_P0L1 | MB_TYPE_P1L1, 2, 2 },    //B_L1_L1_8x16
+  { MB_TYPE_16x8  | MB_TYPE_P0L0 | MB_TYPE_P1L1, 2, 4 },    //B_L0_L1_16x8
+  { MB_TYPE_8x16  | MB_TYPE_P0L0 | MB_TYPE_P1L1, 2, 2 },    //B_L0_L1_8x16
+  { MB_TYPE_16x8  | MB_TYPE_P0L1 | MB_TYPE_P1L0, 2, 4 },    //B_L1_L0_16x8
+  { MB_TYPE_8x16  | MB_TYPE_P0L1 | MB_TYPE_P1L0, 2, 2 },    //B_L1_L0_8x16
+  { MB_TYPE_16x8  | MB_TYPE_P0L0 | MB_TYPE_P1L0 | MB_TYPE_P1L1, 2, 4 },   //B_L0_Bi_16x8
+  { MB_TYPE_8x16  | MB_TYPE_P0L0 | MB_TYPE_P1L0 | MB_TYPE_P1L1, 2, 2 },   //B_L0_Bi_8x16
+  { MB_TYPE_16x8  | MB_TYPE_P0L1 | MB_TYPE_P1L0 | MB_TYPE_P1L1, 2, 4 },   //B_L1_Bi_16x8
+  { MB_TYPE_8x16  | MB_TYPE_P0L1 | MB_TYPE_P1L0 | MB_TYPE_P1L1, 2, 2 },   //B_L1_Bi_8x16
+  { MB_TYPE_16x8  | MB_TYPE_P0L0 | MB_TYPE_P0L1 | MB_TYPE_P1L0, 2, 4 },   //B_Bi_L0_16x8
+  { MB_TYPE_8x16  | MB_TYPE_P0L0 | MB_TYPE_P0L1 | MB_TYPE_P1L0, 2, 2 },   //B_Bi_L0_8x16
+  { MB_TYPE_16x8  | MB_TYPE_P0L0 | MB_TYPE_P0L1 | MB_TYPE_P1L1, 2, 4 },   //B_Bi_L1_16x8
+  { MB_TYPE_8x16  | MB_TYPE_P0L0 | MB_TYPE_P0L1 | MB_TYPE_P1L1, 2, 2 },   //B_Bi_L1_8x16
+  { MB_TYPE_16x8  | MB_TYPE_P0L0 | MB_TYPE_P0L1 | MB_TYPE_P1L0 | MB_TYPE_P1L1, 2, 4 },    //B_Bi_Bi_16x8
+  { MB_TYPE_8x16  | MB_TYPE_P0L0 | MB_TYPE_P0L1 | MB_TYPE_P1L0 | MB_TYPE_P1L1, 2, 2 },    //B_Bi_Bi_8x16
+  { MB_TYPE_8x8   | MB_TYPE_P0L0 | MB_TYPE_P0L1 | MB_TYPE_P1L0 | MB_TYPE_P1L1,  4, 4 }    //B_8x8
 };
 
+//Table 7.17 � Sub-macroblock types in B macroblocks.
+static const SPartMbInfo g_ksInterPSubMbTypeInfo[4] = {
+  {SUB_MB_TYPE_8x8, 1, 2},
+  {SUB_MB_TYPE_8x4, 2, 2},
+  {SUB_MB_TYPE_4x8, 2, 1},
+  {SUB_MB_TYPE_4x4, 4, 1},
+};
+
+//Table 7.18 � Sub-macroblock types in B macroblocks.
+static const SPartMbInfo g_ksInterBSubMbTypeInfo[] = {
+  { MB_TYPE_DIRECT,                               1, 2 }, //B_Direct_8x8
+  { SUB_MB_TYPE_8x8 | MB_TYPE_P0L0,                 1, 2 }, //B_L0_8x8
+  { SUB_MB_TYPE_8x8 | MB_TYPE_P0L1,                 1, 2 }, //B_L1_8x8
+  { SUB_MB_TYPE_8x8 | MB_TYPE_P0L0 | MB_TYPE_P0L1,  1, 2 }, //B_Bi_8x8
+  { SUB_MB_TYPE_8x4 | MB_TYPE_P0L0,                 2, 2 }, //B_L0_8x4
+  { SUB_MB_TYPE_4x8 | MB_TYPE_P0L0,                 2, 1 }, //B_L0_4x8
+  { SUB_MB_TYPE_8x4 | MB_TYPE_P0L1,                 2, 2 }, //B_L1_8x4
+  { SUB_MB_TYPE_4x8 | MB_TYPE_P0L1,                 2, 1 }, //B_L1_4x8
+  { SUB_MB_TYPE_8x4 | MB_TYPE_P0L0 | MB_TYPE_P0L1,  2, 2 }, //B_Bi_8x4
+  { SUB_MB_TYPE_4x8 | MB_TYPE_P0L0 | MB_TYPE_P0L1,  2, 1 }, //B_Bi_4x8
+  { SUB_MB_TYPE_4x4 | MB_TYPE_P0L0,                 4, 1 }, //B_L0_4x4
+  { SUB_MB_TYPE_4x4 | MB_TYPE_P0L1,                 4, 1 }, //B_L1_4x4
+  { SUB_MB_TYPE_4x4 | MB_TYPE_P0L0 | MB_TYPE_P0L1,  4, 1 }  //B_Bi_4x4
+};
+
 typedef struct TagSar {
   uint32_t uiWidth;
   uint32_t uiHeight;
 } sSar;
 static const sSar g_ksVuiSampleAspectRatio[17] = { //Table E-1
-  { 0,  0}, { 1,  1}, {12, 11}, { 10, 11}, {16,11}, //0~4
-  {40, 33}, {24, 11}, {20, 11}, { 32, 11}, {80,33}, //5~9
+  { 0,  0}, { 1,  1}, {12, 11}, { 10, 11}, {16, 11}, //0~4
+  {40, 33}, {24, 11}, {20, 11}, { 32, 11}, {80, 33}, //5~9
   {18, 11}, {15, 11}, {64, 33}, {160, 99}, { 4, 3}, //10~14
   { 3,  2}, { 2,  1}                                //15~16
 };
--- a/codec/decoder/core/src/au_parser.cpp
+++ b/codec/decoder/core/src/au_parser.cpp
@@ -46,6 +46,8 @@
 #include "bit_stream.h"
 #include "memory_align.h"
 
+#define _PARSE_NALHRD_VCLHRD_PARAMS_ 1
+
 namespace WelsDec {
 /*!
  *************************************************************************************
@@ -1558,16 +1560,51 @@
   }
   WELS_READ_VERIFY (BsGetOneBit (pBsAux, &uiCode)); //nal_hrd_parameters_present_flag
   pVui->bNalHrdParamPresentFlag = !!uiCode;
-  if (pVui->bNalHrdParamPresentFlag) { //HRD parse not supported
+  if (pVui->bNalHrdParamPresentFlag) { //Add HRD parse. the values are not being used though.
+#ifdef _PARSE_NALHRD_VCLHRD_PARAMS_
+    int32_t cpb_cnt_minus1 = BsGetUe (pBsAux, &uiCode);
+    /*bit_rate_scale = */BsGetBits (pBsAux, 4, &uiCode);
+    /*cpb_size_scale = */BsGetBits (pBsAux, 4, &uiCode);
+    for (int32_t i = 0; i <= cpb_cnt_minus1; i++) {
+      /*bit_rate_value_minus1[i] = */BsGetUe (pBsAux, &uiCode);
+      /*cpb_size_value_minus1[i] = */BsGetUe (pBsAux, &uiCode);
+      /*cbr_flag[i] = */BsGetOneBit (pBsAux, &uiCode);
+    }
+    /*initial_cpb_removal_delay_length_minus1 = */BsGetBits (pBsAux, 5, &uiCode);
+    /*cpb_removal_delay_length_minus1 = */BsGetBits (pBsAux, 5, &uiCode);
+    /*dpb_output_delay_length_minus1 = */BsGetBits (pBsAux, 5, &uiCode);
+    /*time_offset_length = */BsGetBits (pBsAux, 5, &uiCode);
+#else
     WelsLog (& (pCtx->sLogCtx), WELS_LOG_WARNING, "nal_hrd_parameters_present_flag = 1 not supported.");
     return GENERATE_ERROR_NO (ERR_LEVEL_PARAM_SETS, ERR_INFO_UNSUPPORTED_VUI_HRD);
+#endif
   }
   WELS_READ_VERIFY (BsGetOneBit (pBsAux, &uiCode)); //vcl_hrd_parameters_present_flag
   pVui->bVclHrdParamPresentFlag = !!uiCode;
-  if (pVui->bVclHrdParamPresentFlag) { //HRD parse not supported
+  if (pVui->bVclHrdParamPresentFlag) {//Add HRD parse. the values are not being used though.
+#ifdef _PARSE_NALHRD_VCLHRD_PARAMS_
+    int32_t cpb_cnt_minus1 = BsGetUe (pBsAux, &uiCode);
+    /*bit_rate_scale = */BsGetBits (pBsAux, 4, &uiCode);
+    /*cpb_size_scale = */BsGetBits (pBsAux, 4, &uiCode);
+    for (int32_t i = 0; i <= cpb_cnt_minus1; i++) {
+      /*bit_rate_value_minus1[i] = */BsGetUe (pBsAux, &uiCode);
+      /*cpb_size_value_minus1[i] = */BsGetUe (pBsAux, &uiCode);
+      /*cbr_flag[i] = */BsGetOneBit (pBsAux, &uiCode);
+    }
+    /*initial_cpb_removal_delay_length_minus1 = */BsGetBits (pBsAux, 5, &uiCode);
+    /*cpb_removal_delay_length_minus1 = */BsGetBits (pBsAux, 5, &uiCode);
+    /*dpb_output_delay_length_minus1 = */BsGetBits (pBsAux, 5, &uiCode);
+    /*time_offset_length = */BsGetBits (pBsAux, 5, &uiCode);
+#else
     WelsLog (& (pCtx->sLogCtx), WELS_LOG_WARNING, "vcl_hrd_parameters_present_flag = 1 not supported.");
     return GENERATE_ERROR_NO (ERR_LEVEL_PARAM_SETS, ERR_INFO_UNSUPPORTED_VUI_HRD);
+#endif
   }
+#ifdef _PARSE_NALHRD_VCLHRD_PARAMS_
+  if (pVui->bNalHrdParamPresentFlag | pVui->bVclHrdParamPresentFlag) {
+    /*low_delay_hrd_flag = */BsGetOneBit (pBsAux, &uiCode);
+  }
+#endif
   WELS_READ_VERIFY (BsGetOneBit (pBsAux, &uiCode)); //pic_struct_present_flag
   pVui->bPicStructPresentFlag = !!uiCode;
   WELS_READ_VERIFY (BsGetOneBit (pBsAux, &uiCode)); //bitstream_restriction_flag
--- a/codec/decoder/core/src/deblocking.cpp
+++ b/codec/decoder/core/src/deblocking.cpp
@@ -1,1003 +1,1383 @@
-/*!
- * \copy
- *     Copyright (c)  2010-2013, Cisco Systems
- *     All rights reserved.
- *
- *     Redistribution and use in source and binary forms, with or without
- *     modification, are permitted provided that the following conditions
- *     are met:
- *
- *        * Redistributions of source code must retain the above copyright
- *          notice, this list of conditions and the following disclaimer.
- *
- *        * Redistributions in binary form must reproduce the above copyright
- *          notice, this list of conditions and the following disclaimer in
- *          the documentation and/or other materials provided with the
- *          distribution.
- *
- *     THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- *     "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- *     LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
- *     FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
- *     COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
- *     INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
- *     BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
- *     LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
- *     CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- *     LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
- *     ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- *     POSSIBILITY OF SUCH DAMAGE.
- *
- *
- * \file    deblocking.c
- *
- * \brief   Interfaces introduced in frame deblocking filtering
- *
- * \date    08/02/2010
- *
- *************************************************************************************
- */
-
-#include "deblocking.h"
-#include "deblocking_common.h"
-#include "cpu_core.h"
-
-namespace WelsDec {
-
-#define NO_SUPPORTED_FILTER_IDX     (-1)
-#define LEFT_FLAG_BIT 0
-#define TOP_FLAG_BIT 1
-#define LEFT_FLAG_MASK 0x01
-#define TOP_FLAG_MASK 0x02
-
-#define SAME_MB_DIFF_REFIDX
-#define g_kuiAlphaTable(x) g_kuiAlphaTable[(x)+12]
-#define g_kiBetaTable(x)  g_kiBetaTable[(x)+12]
-#define g_kiTc0Table(x)   g_kiTc0Table[(x)+12]
-
-#define MB_BS_MV(iRefIndex, iMotionVector, iMbXy, iMbBn, iIndex, iNeighIndex) \
-(\
-    ( iRefIndex[iMbXy][iIndex] - iRefIndex[iMbBn][iNeighIndex] )||\
-    ( WELS_ABS( iMotionVector[iMbXy][iIndex][0] - iMotionVector[iMbBn][iNeighIndex][0] ) >= 4 ) ||\
-    ( WELS_ABS( iMotionVector[iMbXy][iIndex][1] - iMotionVector[iMbBn][iNeighIndex][1] ) >= 4 )\
-)
-
-#if defined(SAME_MB_DIFF_REFIDX)
-#define SMB_EDGE_MV(iRefIndex, iMotionVector, iIndex, iNeighIndex) \
-(\
-    ( iRefIndex[iIndex] - iRefIndex[iNeighIndex] )||(\
-    ( WELS_ABS( iMotionVector[iIndex][0] - iMotionVector[iNeighIndex][0] ) &(~3) ) |\
-    ( WELS_ABS( iMotionVector[iIndex][1] - iMotionVector[iNeighIndex][1] ) &(~3) ))\
-)
-#else
-#define SMB_EDGE_MV(iRefIndex, iMotionVector, iIndex, iNeighIndex) \
-(\
-    !!(( WELS_ABS( iMotionVector[iIndex][0] - iMotionVector[iNeighIndex][0] ) &(~3) ) |( WELS_ABS( iMotionVector[iIndex][1] - iMotionVector[iNeighIndex][1] ) &(~3) ))\
-)
-#endif
-
-#define BS_EDGE(bsx1, iRefIndex, iMotionVector, iIndex, iNeighIndex) \
-( (bsx1|SMB_EDGE_MV(iRefIndex, iMotionVector, iIndex, iNeighIndex))<<((uint8_t)(!!bsx1)))
-
-#define GET_ALPHA_BETA_FROM_QP(iQp, iAlphaOffset, iBetaOffset, iIndex, iAlpha, iBeta) \
-{\
-  iIndex = (iQp + iAlphaOffset);\
-  iAlpha = g_kuiAlphaTable(iIndex);\
-  iBeta  = g_kiBetaTable((iQp + iBetaOffset));\
-}
-
-static const uint8_t g_kuiAlphaTable[52 + 24] = { //this table refers to Table 8-16 in H.264/AVC standard
-  0,  0,  0,  0,  0,  0,  0,  0,  0,  0, 0,  0,
-  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
-  0,  0,  0,  0,  0,  0,  4,  4,  5,  6,
-  7,  8,  9, 10, 12, 13, 15, 17, 20, 22,
-  25, 28, 32, 36, 40, 45, 50, 56, 63, 71,
-  80, 90, 101, 113, 127, 144, 162, 182, 203, 226,
-  255, 255
-  , 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255
-};
-
-static const int8_t g_kiBetaTable[52 + 24] = { //this table refers to Table 8-16 in H.264/AVC standard
-  0,  0,  0,  0,  0,  0,  0,  0,  0,  0, 0,  0,
-  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
-  0,  0,  0,  0,  0,  0,  2,  2,  2,  3,
-  3,  3,  3,  4,  4,  4,  6,  6,  7,  7,
-  8,  8,  9,  9, 10, 10, 11, 11, 12, 12,
-  13, 13, 14, 14, 15, 15, 16, 16, 17, 17,
-  18, 18
-  , 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18
-};
-
-static const int8_t g_kiTc0Table[52 + 24][4] = { //this table refers Table 8-17 in H.264/AVC standard
-  { -1, 0, 0, 0 }, { -1, 0, 0, 0 }, { -1, 0, 0, 0 }, { -1, 0, 0, 0 }, { -1, 0, 0, 0 }, { -1, 0, 0, 0 },
-  { -1, 0, 0, 0 }, { -1, 0, 0, 0 }, { -1, 0, 0, 0 }, { -1, 0, 0, 0 }, { -1, 0, 0, 0 }, { -1, 0, 0, 0 },
-  { -1, 0, 0, 0 }, { -1, 0, 0, 0 }, { -1, 0, 0, 0 }, { -1, 0, 0, 0 }, { -1, 0, 0, 0 }, { -1, 0, 0, 0 },
-  { -1, 0, 0, 0 }, { -1, 0, 0, 0 }, { -1, 0, 0, 0 }, { -1, 0, 0, 0 }, { -1, 0, 0, 0 }, { -1, 0, 0, 0 },
-  { -1, 0, 0, 0 }, { -1, 0, 0, 0 }, { -1, 0, 0, 0 }, { -1, 0, 0, 0 }, { -1, 0, 0, 0 }, { -1, 0, 0, 1 },
-  { -1, 0, 0, 1 }, { -1, 0, 0, 1 }, { -1, 0, 0, 1 }, { -1, 0, 1, 1 }, { -1, 0, 1, 1 }, { -1, 1, 1, 1 },
-  { -1, 1, 1, 1 }, { -1, 1, 1, 1 }, { -1, 1, 1, 1 }, { -1, 1, 1, 2 }, { -1, 1, 1, 2 }, { -1, 1, 1, 2 },
-  { -1, 1, 1, 2 }, { -1, 1, 2, 3 }, { -1, 1, 2, 3 }, { -1, 2, 2, 3 }, { -1, 2, 2, 4 }, { -1, 2, 3, 4 },
-  { -1, 2, 3, 4 }, { -1, 3, 3, 5 }, { -1, 3, 4, 6 }, { -1, 3, 4, 6 }, { -1, 4, 5, 7 }, { -1, 4, 5, 8 },
-  { -1, 4, 6, 9 }, { -1, 5, 7, 10 }, { -1, 6, 8, 11 }, { -1, 6, 8, 13 }, { -1, 7, 10, 14 }, { -1, 8, 11, 16 },
-  { -1, 9, 12, 18 }, { -1, 10, 13, 20 }, { -1, 11, 15, 23 }, { -1, 13, 17, 25 }
-  , { -1, 13, 17, 25 }, { -1, 13, 17, 25 }, { -1, 13, 17, 25 }, { -1, 13, 17, 25 }, { -1, 13, 17, 25 }, { -1, 13, 17, 25 }
-  , { -1, 13, 17, 25 }, { -1, 13, 17, 25 }, { -1, 13, 17, 25 }, { -1, 13, 17, 25 }, { -1, 13, 17, 25 }, { -1, 13, 17, 25 }
-};
-
-static const uint8_t g_kuiTableBIdx[2][8] = {
-  {
-    0,  4,  8,  12,
-    3,  7,  11, 15
-  },
-
-  {
-    0,  1,  2,  3,
-    12, 13, 14, 15
-  },
-};
-
-static const uint8_t g_kuiTableB8x8Idx[2][16] = {
-  {
-    0,  1,  4,  5,  8,  9,  12, 13,   // 0   1 |  2  3
-    2,  3,  6,  7, 10, 11,  14, 15    // 4   5 |  6  7
-  },                                  // ------------
-  // 8   9 | 10 11
-  {
-    // 12 13 | 14 15
-    0,  1,  4,  5,  2,  3,  6,  7,
-    8,  9,  12, 13, 10, 11, 14, 15
-  },
-};
-
-#define TC0_TBL_LOOKUP(tc, iIndexA, pBS, bChroma) \
-{\
-  tc[0] = g_kiTc0Table(iIndexA)[pBS[0]] + bChroma;\
-  tc[1] = g_kiTc0Table(iIndexA)[pBS[1]] + bChroma;\
-  tc[2] = g_kiTc0Table(iIndexA)[pBS[2]] + bChroma;\
-  tc[3] = g_kiTc0Table(iIndexA)[pBS[3]] + bChroma;\
-}
-
-void inline DeblockingBSInsideMBAvsbase (int8_t* pNnzTab, uint8_t nBS[2][4][4], int32_t iLShiftFactor) {
-  uint32_t uiNnz32b0, uiNnz32b1, uiNnz32b2, uiNnz32b3;
-
-  uiNnz32b0 = * (uint32_t*) (pNnzTab + 0);
-  uiNnz32b1 = * (uint32_t*) (pNnzTab + 4);
-  uiNnz32b2 = * (uint32_t*) (pNnzTab + 8);
-  uiNnz32b3 = * (uint32_t*) (pNnzTab + 12);
-
-  nBS[0][1][0] = (pNnzTab[0] | pNnzTab[1]) << iLShiftFactor;
-  nBS[0][2][0] = (pNnzTab[1] | pNnzTab[2]) << iLShiftFactor;
-  nBS[0][3][0] = (pNnzTab[2] | pNnzTab[3]) << iLShiftFactor;
-
-  nBS[0][1][1] = (pNnzTab[4] | pNnzTab[5]) << iLShiftFactor;
-  nBS[0][2][1] = (pNnzTab[5] | pNnzTab[6]) << iLShiftFactor;
-  nBS[0][3][1] = (pNnzTab[6] | pNnzTab[7]) << iLShiftFactor;
-  * (uint32_t*)nBS[1][1] = (uiNnz32b0 | uiNnz32b1) << iLShiftFactor;
-
-  nBS[0][1][2] = (pNnzTab[8]  | pNnzTab[9])  << iLShiftFactor;
-  nBS[0][2][2] = (pNnzTab[9]  | pNnzTab[10]) << iLShiftFactor;
-  nBS[0][3][2] = (pNnzTab[10] | pNnzTab[11]) << iLShiftFactor;
-  * (uint32_t*)nBS[1][2] = (uiNnz32b1 | uiNnz32b2) << iLShiftFactor;
-
-  nBS[0][1][3] = (pNnzTab[12] | pNnzTab[13]) << iLShiftFactor;
-  nBS[0][2][3] = (pNnzTab[13] | pNnzTab[14]) << iLShiftFactor;
-  nBS[0][3][3] = (pNnzTab[14] | pNnzTab[15]) << iLShiftFactor;
-  * (uint32_t*)nBS[1][3] = (uiNnz32b2 | uiNnz32b3) << iLShiftFactor;
-}
-
-void inline DeblockingBSInsideMBAvsbase8x8 (int8_t* pNnzTab, uint8_t nBS[2][4][4], int32_t iLShiftFactor) {
-  int8_t i8x8NnzTab[4];
-  for (int32_t i = 0; i < 4; i++) {
-    int32_t iBlkIdx = i << 2;
-    i8x8NnzTab[i] = (pNnzTab[g_kuiMbCountScan4Idx[iBlkIdx]] | pNnzTab[g_kuiMbCountScan4Idx[iBlkIdx + 1]] |
-                     pNnzTab[g_kuiMbCountScan4Idx[iBlkIdx + 2]] | pNnzTab[g_kuiMbCountScan4Idx[iBlkIdx + 3]]);
-  }
-
-  //vertical
-  nBS[0][2][0] = nBS[0][2][1] = (i8x8NnzTab[0] | i8x8NnzTab[1]) << iLShiftFactor;
-  nBS[0][2][2] = nBS[0][2][3] = (i8x8NnzTab[2] | i8x8NnzTab[3]) << iLShiftFactor;
-  //horizontal
-  nBS[1][2][0] = nBS[1][2][1] = (i8x8NnzTab[0] | i8x8NnzTab[2]) << iLShiftFactor;
-  nBS[1][2][2] = nBS[1][2][3] = (i8x8NnzTab[1] | i8x8NnzTab[3]) << iLShiftFactor;
-}
-
-void static inline DeblockingBSInsideMBNormal (PDqLayer pCurDqLayer, uint8_t nBS[2][4][4], int8_t* pNnzTab,
-    int32_t iMbXy) {
-  uint32_t uiNnz32b0, uiNnz32b1, uiNnz32b2, uiNnz32b3;
-  int8_t* iRefIndex = pCurDqLayer->pRefIndex[LIST_0][iMbXy];
-  ENFORCE_STACK_ALIGN_1D (uint8_t, uiBsx4, 4, 4);
-
-  int8_t i8x8NnzTab[4];
-
-  if (pCurDqLayer->pTransformSize8x8Flag[iMbXy]) {
-    for (int32_t i = 0; i < 4; i++) {
-      int32_t iBlkIdx = i << 2;
-      i8x8NnzTab[i] = (pNnzTab[g_kuiMbCountScan4Idx[iBlkIdx]] | pNnzTab[g_kuiMbCountScan4Idx[iBlkIdx + 1]] |
-                       pNnzTab[g_kuiMbCountScan4Idx[iBlkIdx + 2]] | pNnzTab[g_kuiMbCountScan4Idx[iBlkIdx + 3]]);
-    }
-    //vertical
-    nBS[0][2][0] = nBS[0][2][1] = BS_EDGE ((i8x8NnzTab[0] | i8x8NnzTab[1]), iRefIndex, pCurDqLayer->pMv[LIST_0][iMbXy],
-                                           g_kuiMbCountScan4Idx[1 << 2], g_kuiMbCountScan4Idx[0]);
-    nBS[0][2][2] = nBS[0][2][3] = BS_EDGE ((i8x8NnzTab[2] | i8x8NnzTab[3]), iRefIndex, pCurDqLayer->pMv[LIST_0][iMbXy],
-                                           g_kuiMbCountScan4Idx[3 << 2], g_kuiMbCountScan4Idx[2 << 2]);
-
-    //horizontal
-    nBS[1][2][0] = nBS[1][2][1] = BS_EDGE ((i8x8NnzTab[0] | i8x8NnzTab[2]), iRefIndex, pCurDqLayer->pMv[LIST_0][iMbXy],
-                                           g_kuiMbCountScan4Idx[2 << 2], g_kuiMbCountScan4Idx[0]);
-    nBS[1][2][2] = nBS[1][2][3] = BS_EDGE ((i8x8NnzTab[1] | i8x8NnzTab[3]), iRefIndex, pCurDqLayer->pMv[LIST_0][iMbXy],
-                                           g_kuiMbCountScan4Idx[3 << 2], g_kuiMbCountScan4Idx[1 << 2]);
-  } else {
-    uiNnz32b0 = * (uint32_t*) (pNnzTab + 0);
-    uiNnz32b1 = * (uint32_t*) (pNnzTab + 4);
-    uiNnz32b2 = * (uint32_t*) (pNnzTab + 8);
-    uiNnz32b3 = * (uint32_t*) (pNnzTab + 12);
-
-    for (int i = 0; i < 3; i++)
-      uiBsx4[i] = pNnzTab[i] | pNnzTab[i + 1];
-    nBS[0][1][0] = BS_EDGE (uiBsx4[0], iRefIndex, pCurDqLayer->pMv[LIST_0][iMbXy], 1, 0);
-    nBS[0][2][0] = BS_EDGE (uiBsx4[1], iRefIndex, pCurDqLayer->pMv[LIST_0][iMbXy], 2, 1);
-    nBS[0][3][0] = BS_EDGE (uiBsx4[2], iRefIndex, pCurDqLayer->pMv[LIST_0][iMbXy], 3, 2);
-
-    for (int i = 0; i < 3; i++)
-      uiBsx4[i] = pNnzTab[4 + i] | pNnzTab[4 + i + 1];
-    nBS[0][1][1] = BS_EDGE (uiBsx4[0], iRefIndex, pCurDqLayer->pMv[LIST_0][iMbXy], 5, 4);
-    nBS[0][2][1] = BS_EDGE (uiBsx4[1], iRefIndex, pCurDqLayer->pMv[LIST_0][iMbXy], 6, 5);
-    nBS[0][3][1] = BS_EDGE (uiBsx4[2], iRefIndex, pCurDqLayer->pMv[LIST_0][iMbXy], 7, 6);
-
-    for (int i = 0; i < 3; i++)
-      uiBsx4[i] = pNnzTab[8 + i] | pNnzTab[8 + i + 1];
-    nBS[0][1][2] = BS_EDGE (uiBsx4[0], iRefIndex, pCurDqLayer->pMv[LIST_0][iMbXy], 9, 8);
-    nBS[0][2][2] = BS_EDGE (uiBsx4[1], iRefIndex, pCurDqLayer->pMv[LIST_0][iMbXy], 10, 9);
-    nBS[0][3][2] = BS_EDGE (uiBsx4[2], iRefIndex, pCurDqLayer->pMv[LIST_0][iMbXy], 11, 10);
-
-    for (int i = 0; i < 3; i++)
-      uiBsx4[i] = pNnzTab[12 + i] | pNnzTab[12 + i + 1];
-    nBS[0][1][3] = BS_EDGE (uiBsx4[0], iRefIndex, pCurDqLayer->pMv[LIST_0][iMbXy], 13, 12);
-    nBS[0][2][3] = BS_EDGE (uiBsx4[1], iRefIndex, pCurDqLayer->pMv[LIST_0][iMbXy], 14, 13);
-    nBS[0][3][3] = BS_EDGE (uiBsx4[2], iRefIndex, pCurDqLayer->pMv[LIST_0][iMbXy], 15, 14);
-
-    // horizontal
-    * (uint32_t*)uiBsx4 = (uiNnz32b0 | uiNnz32b1);
-    nBS[1][1][0] = BS_EDGE (uiBsx4[0], iRefIndex, pCurDqLayer->pMv[LIST_0][iMbXy], 4, 0);
-    nBS[1][1][1] = BS_EDGE (uiBsx4[1], iRefIndex, pCurDqLayer->pMv[LIST_0][iMbXy], 5, 1);
-    nBS[1][1][2] = BS_EDGE (uiBsx4[2], iRefIndex, pCurDqLayer->pMv[LIST_0][iMbXy], 6, 2);
-    nBS[1][1][3] = BS_EDGE (uiBsx4[3], iRefIndex, pCurDqLayer->pMv[LIST_0][iMbXy], 7, 3);
-
-    * (uint32_t*)uiBsx4 = (uiNnz32b1 | uiNnz32b2);
-    nBS[1][2][0] = BS_EDGE (uiBsx4[0], iRefIndex, pCurDqLayer->pMv[LIST_0][iMbXy], 8, 4);
-    nBS[1][2][1] = BS_EDGE (uiBsx4[1], iRefIndex, pCurDqLayer->pMv[LIST_0][iMbXy], 9, 5);
-    nBS[1][2][2] = BS_EDGE (uiBsx4[2], iRefIndex, pCurDqLayer->pMv[LIST_0][iMbXy], 10, 6);
-    nBS[1][2][3] = BS_EDGE (uiBsx4[3], iRefIndex, pCurDqLayer->pMv[LIST_0][iMbXy], 11, 7);
-
-    * (uint32_t*)uiBsx4 = (uiNnz32b2 | uiNnz32b3);
-    nBS[1][3][0] = BS_EDGE (uiBsx4[0], iRefIndex, pCurDqLayer->pMv[LIST_0][iMbXy], 12, 8);
-    nBS[1][3][1] = BS_EDGE (uiBsx4[1], iRefIndex, pCurDqLayer->pMv[LIST_0][iMbXy], 13, 9);
-    nBS[1][3][2] = BS_EDGE (uiBsx4[2], iRefIndex, pCurDqLayer->pMv[LIST_0][iMbXy], 14, 10);
-    nBS[1][3][3] = BS_EDGE (uiBsx4[3], iRefIndex, pCurDqLayer->pMv[LIST_0][iMbXy], 15, 11);
-  }
-}
-
-uint32_t DeblockingBsMarginalMBAvcbase (PDqLayer pCurDqLayer, int32_t iEdge, int32_t iNeighMb, int32_t iMbXy) {
-  int32_t i, j;
-  uint32_t uiBSx4;
-  uint8_t* pBS = (uint8_t*) (&uiBSx4);
-  const uint8_t* pBIdx      = &g_kuiTableBIdx[iEdge][0];
-  const uint8_t* pBnIdx     = &g_kuiTableBIdx[iEdge][4];
-  const uint8_t* pB8x8Idx   = &g_kuiTableB8x8Idx[iEdge][0];
-  const uint8_t* pBn8x8Idx  = &g_kuiTableB8x8Idx[iEdge][8];
-
-  if (pCurDqLayer->pTransformSize8x8Flag[iMbXy] && pCurDqLayer->pTransformSize8x8Flag[iNeighMb]) {
-    for (i = 0; i < 2; i++) {
-      uint8_t uiNzc = 0;
-      for (j = 0; uiNzc == 0 && j < 4; j++) {
-        uiNzc |= (pCurDqLayer->pNzc[iMbXy][* (pB8x8Idx + j)] | pCurDqLayer->pNzc[iNeighMb][* (pBn8x8Idx + j)]);
-      }
-      if (uiNzc) {
-        pBS[i << 1] = pBS[1 + (i << 1)] = 2;
-      } else {
-        pBS[i << 1] = pBS[1 + (i << 1)] = MB_BS_MV (pCurDqLayer->pRefIndex[LIST_0], pCurDqLayer->pMv[LIST_0], iMbXy, iNeighMb,
-                                          *pB8x8Idx, *pBn8x8Idx);
-      }
-      pB8x8Idx += 4;
-      pBn8x8Idx += 4;
-    }
-  } else if (pCurDqLayer->pTransformSize8x8Flag[iMbXy]) {
-    for (i = 0; i < 2; i++) {
-      uint8_t uiNzc = 0;
-      for (j = 0; uiNzc == 0 && j < 4; j++) {
-        uiNzc |= pCurDqLayer->pNzc[iMbXy][* (pB8x8Idx + j)];
-      }
-      for (j = 0; j < 2; j++) {
-        if (uiNzc | pCurDqLayer->pNzc[iNeighMb][*pBnIdx]) {
-          pBS[j + (i << 1)] = 2;
-        } else {
-          pBS[j + (i << 1)] = MB_BS_MV (pCurDqLayer->pRefIndex[LIST_0], pCurDqLayer->pMv[LIST_0], iMbXy, iNeighMb, *pB8x8Idx,
-                                        *pBnIdx);
-        }
-        pBnIdx++;
-      }
-      pB8x8Idx += 4;
-    }
-  } else if (pCurDqLayer->pTransformSize8x8Flag[iNeighMb]) {
-    for (i = 0; i < 2; i++) {
-      uint8_t uiNzc = 0;
-      for (j = 0; uiNzc == 0 && j < 4; j++) {
-        uiNzc |= pCurDqLayer->pNzc[iNeighMb][* (pBn8x8Idx + j)];
-      }
-      for (j = 0; j < 2; j++) {
-        if (uiNzc | pCurDqLayer->pNzc[iMbXy][*pBIdx]) {
-          pBS[j + (i << 1)] = 2;
-        } else {
-          pBS[j + (i << 1)] = MB_BS_MV (pCurDqLayer->pRefIndex[LIST_0], pCurDqLayer->pMv[LIST_0], iMbXy, iNeighMb, *pBIdx,
-                                        *pBn8x8Idx);
-        }
-        pBIdx++;
-      }
-      pBn8x8Idx += 4;
-    }
-  } else {
-    // only 4x4 transform
-    for (i = 0; i < 4; i++) {
-      if (pCurDqLayer->pNzc[iMbXy][*pBIdx] | pCurDqLayer->pNzc[iNeighMb][*pBnIdx]) {
-        pBS[i] = 2;
-      } else {
-        pBS[i] = MB_BS_MV (pCurDqLayer->pRefIndex[LIST_0], pCurDqLayer->pMv[LIST_0], iMbXy, iNeighMb, *pBIdx,
-                           *pBnIdx);
-      }
-      pBIdx++;
-      pBnIdx++;
-    }
-  }
-
-  return uiBSx4;
-}
-int32_t DeblockingAvailableNoInterlayer (PDqLayer pCurDqLayer, int32_t iFilterIdc) {
-  int32_t iMbY = pCurDqLayer->iMbY;
-  int32_t iMbX = pCurDqLayer->iMbX;
-  int32_t iMbXy = pCurDqLayer->iMbXyIndex;
-  bool bLeftFlag = false;
-  bool bTopFlag  = false;
-
-  if (2 == iFilterIdc) {
-    bLeftFlag = (iMbX > 0) && (pCurDqLayer->pSliceIdc[iMbXy] == pCurDqLayer->pSliceIdc[iMbXy - 1]);
-    bTopFlag  = (iMbY > 0) && (pCurDqLayer->pSliceIdc[iMbXy] == pCurDqLayer->pSliceIdc[iMbXy - pCurDqLayer->iMbWidth]);
-  } else { //if ( 0 == iFilterIdc )
-    bLeftFlag = (iMbX > 0);
-    bTopFlag  = (iMbY > 0);
-  }
-  return (bLeftFlag << LEFT_FLAG_BIT) | (bTopFlag << TOP_FLAG_BIT);
-}
-
-void FilteringEdgeLumaH (SDeblockingFilter* pFilter, uint8_t* pPix, int32_t iStride, uint8_t* pBS) {
-  int32_t iIndexA;
-  int32_t iAlpha;
-  int32_t iBeta;
-  ENFORCE_STACK_ALIGN_1D (int8_t, tc, 4, 16);
-
-  GET_ALPHA_BETA_FROM_QP (pFilter->iLumaQP, pFilter->iSliceAlphaC0Offset, pFilter->iSliceBetaOffset, iIndexA, iAlpha,
-                          iBeta);
-
-  if (iAlpha | iBeta) {
-    TC0_TBL_LOOKUP (tc, iIndexA, pBS, 0);
-    pFilter->pLoopf->pfLumaDeblockingLT4Ver (pPix, iStride, iAlpha, iBeta, tc);
-  }
-  return;
-}
-
-
-void FilteringEdgeLumaV (SDeblockingFilter* pFilter, uint8_t* pPix, int32_t iStride, uint8_t* pBS) {
-  int32_t  iIndexA;
-  int32_t  iAlpha;
-  int32_t  iBeta;
-  ENFORCE_STACK_ALIGN_1D (int8_t, tc, 4, 16);
-
-  GET_ALPHA_BETA_FROM_QP (pFilter->iLumaQP, pFilter->iSliceAlphaC0Offset, pFilter->iSliceBetaOffset, iIndexA, iAlpha,
-                          iBeta);
-
-  if (iAlpha | iBeta) {
-    TC0_TBL_LOOKUP (tc, iIndexA, pBS, 0);
-    pFilter->pLoopf->pfLumaDeblockingLT4Hor (pPix, iStride, iAlpha, iBeta, tc);
-  }
-  return;
-}
-
-
-void FilteringEdgeLumaIntraH (SDeblockingFilter* pFilter, uint8_t* pPix, int32_t iStride, uint8_t* pBS) {
-  int32_t iIndexA;
-  int32_t iAlpha;
-  int32_t iBeta;
-
-  GET_ALPHA_BETA_FROM_QP (pFilter->iLumaQP, pFilter->iSliceAlphaC0Offset, pFilter->iSliceBetaOffset, iIndexA, iAlpha,
-                          iBeta);
-
-  if (iAlpha | iBeta) {
-    pFilter->pLoopf->pfLumaDeblockingEQ4Ver (pPix, iStride, iAlpha, iBeta);
-  }
-  return;
-}
-
-void FilteringEdgeLumaIntraV (SDeblockingFilter* pFilter, uint8_t* pPix, int32_t iStride, uint8_t* pBS) {
-  int32_t iIndexA;
-  int32_t iAlpha;
-  int32_t iBeta;
-
-  GET_ALPHA_BETA_FROM_QP (pFilter->iLumaQP, pFilter->iSliceAlphaC0Offset, pFilter->iSliceBetaOffset, iIndexA, iAlpha,
-                          iBeta);
-
-  if (iAlpha | iBeta) {
-    pFilter->pLoopf->pfLumaDeblockingEQ4Hor (pPix, iStride, iAlpha, iBeta);
-  }
-  return;
-}
-void FilteringEdgeChromaH (SDeblockingFilter* pFilter, uint8_t* pPixCb, uint8_t* pPixCr, int32_t iStride,
-                           uint8_t* pBS) {
-  int32_t iIndexA;
-  int32_t iAlpha;
-  int32_t iBeta;
-  ENFORCE_STACK_ALIGN_1D (int8_t, tc, 4, 16);
-  if (pFilter->iChromaQP[0] == pFilter->iChromaQP[1]) {
-
-    GET_ALPHA_BETA_FROM_QP (pFilter->iChromaQP[0], pFilter->iSliceAlphaC0Offset, pFilter->iSliceBetaOffset, iIndexA, iAlpha,
-                            iBeta);
-
-    if (iAlpha | iBeta) {
-      TC0_TBL_LOOKUP (tc, iIndexA, pBS, 1);
-      pFilter->pLoopf->pfChromaDeblockingLT4Ver (pPixCb, pPixCr, iStride, iAlpha, iBeta, tc);
-    }
-  } else {
-
-    for (int i = 0; i < 2; i++) {
-
-
-      GET_ALPHA_BETA_FROM_QP (pFilter->iChromaQP[i], pFilter->iSliceAlphaC0Offset, pFilter->iSliceBetaOffset, iIndexA, iAlpha,
-                              iBeta);
-
-      if (iAlpha | iBeta) {
-        uint8_t* pPixCbCr = (i == 0) ? pPixCb : pPixCr;
-        TC0_TBL_LOOKUP (tc, iIndexA, pBS, 1);
-        pFilter->pLoopf->pfChromaDeblockingLT4Ver2 (pPixCbCr, iStride, iAlpha, iBeta, tc);
-      }
-
-
-
-    }
-
-  }
-  return;
-}
-void FilteringEdgeChromaV (SDeblockingFilter* pFilter, uint8_t* pPixCb, uint8_t* pPixCr, int32_t iStride,
-                           uint8_t* pBS) {
-  int32_t iIndexA;
-  int32_t iAlpha;
-  int32_t iBeta;
-  ENFORCE_STACK_ALIGN_1D (int8_t, tc, 4, 16);
-  if (pFilter->iChromaQP[0] == pFilter->iChromaQP[1]) {
-
-
-    GET_ALPHA_BETA_FROM_QP (pFilter->iChromaQP[0], pFilter->iSliceAlphaC0Offset, pFilter->iSliceBetaOffset, iIndexA, iAlpha,
-                            iBeta);
-
-    if (iAlpha | iBeta) {
-      TC0_TBL_LOOKUP (tc, iIndexA, pBS, 1);
-      pFilter->pLoopf->pfChromaDeblockingLT4Hor (pPixCb, pPixCr, iStride, iAlpha, iBeta, tc);
-    }
-
-
-  } else {
-
-    for (int i = 0; i < 2; i++) {
-
-      GET_ALPHA_BETA_FROM_QP (pFilter->iChromaQP[i], pFilter->iSliceAlphaC0Offset, pFilter->iSliceBetaOffset, iIndexA, iAlpha,
-                              iBeta);
-
-      if (iAlpha | iBeta) {
-        uint8_t* pPixCbCr = (i == 0) ? pPixCb : pPixCr;
-        TC0_TBL_LOOKUP (tc, iIndexA, pBS, 1);
-        pFilter->pLoopf->pfChromaDeblockingLT4Hor2 (pPixCbCr, iStride, iAlpha, iBeta, tc);
-      }
-
-
-    }
-  }
-  return;
-}
-
-void FilteringEdgeChromaIntraH (SDeblockingFilter* pFilter, uint8_t* pPixCb, uint8_t* pPixCr, int32_t iStride,
-                                uint8_t* pBS) {
-  int32_t iIndexA;
-  int32_t iAlpha;
-  int32_t iBeta;
-  if (pFilter->iChromaQP[0] == pFilter->iChromaQP[1]) {
-
-    GET_ALPHA_BETA_FROM_QP (pFilter->iChromaQP[0], pFilter->iSliceAlphaC0Offset, pFilter->iSliceBetaOffset, iIndexA, iAlpha,
-                            iBeta);
-
-    if (iAlpha | iBeta) {
-      pFilter->pLoopf->pfChromaDeblockingEQ4Ver (pPixCb, pPixCr, iStride, iAlpha, iBeta);
-    }
-  } else {
-
-    for (int i = 0; i < 2; i++) {
-
-      GET_ALPHA_BETA_FROM_QP (pFilter->iChromaQP[i], pFilter->iSliceAlphaC0Offset, pFilter->iSliceBetaOffset, iIndexA, iAlpha,
-                              iBeta);
-
-      if (iAlpha | iBeta) {
-        uint8_t* pPixCbCr = (i == 0) ? pPixCb : pPixCr;
-        pFilter->pLoopf->pfChromaDeblockingEQ4Ver2 (pPixCbCr, iStride, iAlpha, iBeta);
-      }
-
-    }
-  }
-  return;
-}
-
-void FilteringEdgeChromaIntraV (SDeblockingFilter* pFilter, uint8_t* pPixCb, uint8_t* pPixCr, int32_t iStride,
-                                uint8_t* pBS) {
-  int32_t iIndexA;
-  int32_t iAlpha;
-  int32_t iBeta;
-  if (pFilter->iChromaQP[0] == pFilter->iChromaQP[1]) { // QP of cb and cr are the same
-
-
-
-
-    GET_ALPHA_BETA_FROM_QP (pFilter->iChromaQP[0], pFilter->iSliceAlphaC0Offset, pFilter->iSliceBetaOffset, iIndexA, iAlpha,
-                            iBeta);
-    if (iAlpha | iBeta) {
-      pFilter->pLoopf->pfChromaDeblockingEQ4Hor (pPixCb, pPixCr, iStride, iAlpha, iBeta);
-    }
-  } else {
-
-    for (int i = 0; i < 2; i++) {
-
-
-      GET_ALPHA_BETA_FROM_QP (pFilter->iChromaQP[i], pFilter->iSliceAlphaC0Offset, pFilter->iSliceBetaOffset, iIndexA, iAlpha,
-                              iBeta);
-      if (iAlpha | iBeta) {
-        uint8_t* pPixCbCr = (i == 0) ? pPixCb : pPixCr;
-        pFilter->pLoopf->pfChromaDeblockingEQ4Hor2 (pPixCbCr, iStride, iAlpha, iBeta);
-      }
-    }
-
-  }
-  return;
-}
-
-
-void DeblockingInterMb (PDqLayer pCurDqLayer, PDeblockingFilter  pFilter, uint8_t nBS[2][4][4],
-                        int32_t iBoundryFlag) {
-  int32_t iMbXyIndex = pCurDqLayer->iMbXyIndex;
-  int32_t iMbX = pCurDqLayer->iMbX;
-  int32_t iMbY = pCurDqLayer->iMbY;
-
-  int32_t iCurLumaQp = pCurDqLayer->pLumaQp[iMbXyIndex];
-  //int32_t* iCurChromaQp = pCurDqLayer->pChromaQp[iMbXyIndex];
-  int8_t* pCurChromaQp = pCurDqLayer->pChromaQp[iMbXyIndex];
-  int32_t iLineSize   = pFilter->iCsStride[0];
-  int32_t iLineSizeUV = pFilter->iCsStride[1];
-
-  uint8_t* pDestY, * pDestCb, * pDestCr;
-  pDestY  = pFilter->pCsData[0] + ((iMbY * iLineSize + iMbX) << 4);
-  pDestCb = pFilter->pCsData[1] + ((iMbY * iLineSizeUV + iMbX) << 3);
-  pDestCr = pFilter->pCsData[2] + ((iMbY * iLineSizeUV + iMbX) << 3);
-
-//Vertical margrin
-  if (iBoundryFlag & LEFT_FLAG_MASK) {
-    int32_t iLeftXyIndex = iMbXyIndex - 1;
-    pFilter->iLumaQP   = (iCurLumaQp + pCurDqLayer->pLumaQp[iLeftXyIndex] + 1) >> 1;
-    for (int i = 0; i < 2; i++) {
-      pFilter->iChromaQP[i] = (pCurChromaQp[i] + pCurDqLayer->pChromaQp[iLeftXyIndex][i] + 1) >> 1;
-    }
-    if (nBS[0][0][0] == 0x04) {
-      FilteringEdgeLumaIntraV (pFilter, pDestY, iLineSize, NULL);
-      FilteringEdgeChromaIntraV (pFilter, pDestCb, pDestCr, iLineSizeUV, NULL);
-    } else {
-      if (* (uint32_t*)nBS[0][0] != 0) {
-        FilteringEdgeLumaV (pFilter, pDestY, iLineSize, nBS[0][0]);
-        FilteringEdgeChromaV (pFilter, pDestCb, pDestCr, iLineSizeUV, nBS[0][0]);
-      }
-    }
-  }
-
-  pFilter->iLumaQP = iCurLumaQp;
-  pFilter->iChromaQP[0] = pCurChromaQp[0];
-  pFilter->iChromaQP[1] = pCurChromaQp[1];
-
-  if (* (uint32_t*)nBS[0][1] != 0 && !pCurDqLayer->pTransformSize8x8Flag[iMbXyIndex]) {
-    FilteringEdgeLumaV (pFilter, &pDestY[1 << 2], iLineSize, nBS[0][1]);
-  }
-
-  if (* (uint32_t*)nBS[0][2] != 0) {
-    FilteringEdgeLumaV (pFilter, &pDestY[2 << 2], iLineSize, nBS[0][2]);
-    FilteringEdgeChromaV (pFilter, &pDestCb[2 << 1], &pDestCr[2 << 1], iLineSizeUV, nBS[0][2]);
-  }
-
-  if (* (uint32_t*)nBS[0][3] != 0  && !pCurDqLayer->pTransformSize8x8Flag[iMbXyIndex]) {
-    FilteringEdgeLumaV (pFilter, &pDestY[3 << 2], iLineSize, nBS[0][3]);
-  }
-
-  if (iBoundryFlag & TOP_FLAG_MASK) {
-    int32_t iTopXyIndex = iMbXyIndex - pCurDqLayer->iMbWidth;
-    pFilter->iLumaQP = (iCurLumaQp + pCurDqLayer->pLumaQp[iTopXyIndex] + 1) >> 1;
-    for (int i = 0; i < 2; i++) {
-      pFilter->iChromaQP[i] = (pCurChromaQp[i] + pCurDqLayer->pChromaQp[iTopXyIndex][i] + 1) >> 1;
-    }
-
-    if (nBS[1][0][0] == 0x04) {
-      FilteringEdgeLumaIntraH (pFilter, pDestY, iLineSize, NULL);
-      FilteringEdgeChromaIntraH (pFilter, pDestCb, pDestCr, iLineSizeUV, NULL);
-    } else {
-      if (* (uint32_t*)nBS[1][0] != 0) {
-        FilteringEdgeLumaH (pFilter, pDestY, iLineSize, nBS[1][0]);
-        FilteringEdgeChromaH (pFilter, pDestCb, pDestCr, iLineSizeUV, nBS[1][0]);
-      }
-    }
-  }
-
-  pFilter->iLumaQP = iCurLumaQp;
-  pFilter->iChromaQP[0] = pCurChromaQp[0];
-  pFilter->iChromaQP[1] = pCurChromaQp[1];
-
-  if (* (uint32_t*)nBS[1][1] != 0  && !pCurDqLayer->pTransformSize8x8Flag[iMbXyIndex]) {
-    FilteringEdgeLumaH (pFilter, &pDestY[ (1 << 2)*iLineSize], iLineSize, nBS[1][1]);
-  }
-
-  if (* (uint32_t*)nBS[1][2] != 0) {
-    FilteringEdgeLumaH (pFilter, &pDestY[ (2 << 2)*iLineSize], iLineSize, nBS[1][2]);
-    FilteringEdgeChromaH (pFilter, &pDestCb[ (2 << 1)*iLineSizeUV], &pDestCr[ (2 << 1)*iLineSizeUV], iLineSizeUV,
-                          nBS[1][2]);
-  }
-
-  if (* (uint32_t*)nBS[1][3] != 0  && !pCurDqLayer->pTransformSize8x8Flag[iMbXyIndex]) {
-    FilteringEdgeLumaH (pFilter, &pDestY[ (3 << 2)*iLineSize], iLineSize, nBS[1][3]);
-  }
-}
-
-void FilteringEdgeLumaHV (PDqLayer pCurDqLayer, PDeblockingFilter  pFilter, int32_t iBoundryFlag) {
-  int32_t iMbXyIndex = pCurDqLayer->iMbXyIndex;
-  int32_t iMbX      = pCurDqLayer->iMbX;
-  int32_t iMbY      = pCurDqLayer->iMbY;
-  int32_t iMbWidth  = pCurDqLayer->iMbWidth;
-  int32_t iLineSize  = pFilter->iCsStride[0];
-
-  uint8_t*  pDestY;
-  int32_t  iCurQp;
-  int32_t  iIndexA, iAlpha, iBeta;
-
-  ENFORCE_STACK_ALIGN_1D (int8_t,  iTc,   4, 16);
-  ENFORCE_STACK_ALIGN_1D (uint8_t, uiBSx4, 4, 4);
-
-  pDestY  = pFilter->pCsData[0] + ((iMbY * iLineSize + iMbX) << 4);
-  iCurQp  = pCurDqLayer->pLumaQp[iMbXyIndex];
-
-  * (uint32_t*)uiBSx4 = 0x03030303;
-
-  // luma v
-  if (iBoundryFlag & LEFT_FLAG_MASK) {
-    pFilter->iLumaQP   = (iCurQp   + pCurDqLayer->pLumaQp[iMbXyIndex - 1] + 1) >> 1;
-    FilteringEdgeLumaIntraV (pFilter, pDestY, iLineSize, NULL);
-  }
-
-  pFilter->iLumaQP   = iCurQp;
-  GET_ALPHA_BETA_FROM_QP (pFilter->iLumaQP, pFilter->iSliceAlphaC0Offset, pFilter->iSliceBetaOffset, iIndexA, iAlpha,
-                          iBeta);
-  if (iAlpha | iBeta) {
-    TC0_TBL_LOOKUP (iTc, iIndexA, uiBSx4, 0);
-
-    if (!pCurDqLayer->pTransformSize8x8Flag[iMbXyIndex]) {
-      pFilter->pLoopf->pfLumaDeblockingLT4Hor (&pDestY[1 << 2], iLineSize, iAlpha, iBeta, iTc);
-    }
-
-    pFilter->pLoopf->pfLumaDeblockingLT4Hor (&pDestY[2 << 2], iLineSize, iAlpha, iBeta, iTc);
-
-    if (!pCurDqLayer->pTransformSize8x8Flag[iMbXyIndex]) {
-      pFilter->pLoopf->pfLumaDeblockingLT4Hor (&pDestY[3 << 2], iLineSize, iAlpha, iBeta, iTc);
-    }
-  }
-
-  // luma h
-  if (iBoundryFlag & TOP_FLAG_MASK) {
-    pFilter->iLumaQP   = (iCurQp   + pCurDqLayer->pLumaQp[iMbXyIndex - iMbWidth] + 1) >> 1;
-    FilteringEdgeLumaIntraH (pFilter, pDestY, iLineSize, NULL);
-  }
-
-  pFilter->iLumaQP   = iCurQp;
-  if (iAlpha | iBeta) {
-    if (!pCurDqLayer->pTransformSize8x8Flag[iMbXyIndex]) {
-      pFilter->pLoopf->pfLumaDeblockingLT4Ver (&pDestY[ (1 << 2)*iLineSize], iLineSize, iAlpha, iBeta, iTc);
-    }
-
-    pFilter->pLoopf->pfLumaDeblockingLT4Ver (&pDestY[ (2 << 2)*iLineSize], iLineSize, iAlpha, iBeta, iTc);
-
-    if (!pCurDqLayer->pTransformSize8x8Flag[iMbXyIndex]) {
-      pFilter->pLoopf->pfLumaDeblockingLT4Ver (&pDestY[ (3 << 2)*iLineSize], iLineSize, iAlpha, iBeta, iTc);
-    }
-  }
-}
-void FilteringEdgeChromaHV (PDqLayer pCurDqLayer, PDeblockingFilter  pFilter, int32_t iBoundryFlag) {
-  int32_t iMbXyIndex     = pCurDqLayer->iMbXyIndex;
-  int32_t iMbX      = pCurDqLayer->iMbX;
-  int32_t iMbY      = pCurDqLayer->iMbY;
-  int32_t iMbWidth  = pCurDqLayer->iMbWidth;
-  int32_t iLineSize  = pFilter->iCsStride[1];
-
-  uint8_t* pDestCb;
-  uint8_t* pDestCr;
-  //int32_t  iCurQp;
-  int8_t* pCurQp;
-  int32_t  iIndexA, iAlpha, iBeta;
-
-  ENFORCE_STACK_ALIGN_1D (int8_t,  iTc,   4, 16);
-  ENFORCE_STACK_ALIGN_1D (uint8_t, uiBSx4, 4, 4);
-
-  pDestCb = pFilter->pCsData[1] + ((iMbY * iLineSize + iMbX) << 3);
-  pDestCr = pFilter->pCsData[2] + ((iMbY * iLineSize + iMbX) << 3);
-  pCurQp  = pCurDqLayer->pChromaQp[iMbXyIndex];
-
-  * (uint32_t*)uiBSx4 = 0x03030303;
-
-
-// chroma v
-  if (iBoundryFlag & LEFT_FLAG_MASK) {
-
-    for (int i = 0; i < 2; i++) {
-      pFilter->iChromaQP[i] = (pCurQp[i] + pCurDqLayer->pChromaQp[iMbXyIndex - 1][i] + 1) >> 1;
-
-    }
-    FilteringEdgeChromaIntraV (pFilter, pDestCb, pDestCr, iLineSize, NULL);
-  }
-
-  pFilter->iChromaQP[0]   = pCurQp[0];
-  pFilter->iChromaQP[1]   = pCurQp[1];
-  if (pFilter->iChromaQP[0] == pFilter->iChromaQP[1]) {
-    GET_ALPHA_BETA_FROM_QP (pFilter->iChromaQP[0], pFilter->iSliceAlphaC0Offset, pFilter->iSliceBetaOffset, iIndexA, iAlpha,
-                            iBeta);
-    if (iAlpha | iBeta) {
-      TC0_TBL_LOOKUP (iTc, iIndexA, uiBSx4, 1);
-      pFilter->pLoopf->pfChromaDeblockingLT4Hor (&pDestCb[2 << 1], &pDestCr[2 << 1], iLineSize, iAlpha, iBeta, iTc);
-    }
-  } else {
-
-    for (int i = 0; i < 2; i++) {
-      GET_ALPHA_BETA_FROM_QP (pFilter->iChromaQP[i], pFilter->iSliceAlphaC0Offset, pFilter->iSliceBetaOffset, iIndexA, iAlpha,
-                              iBeta);
-      if (iAlpha | iBeta) {
-        uint8_t* pDestCbCr = (i == 0) ? &pDestCb[2 << 1] : &pDestCr[2 << 1];
-        TC0_TBL_LOOKUP (iTc, iIndexA, uiBSx4, 1);
-        pFilter->pLoopf->pfChromaDeblockingLT4Hor2 (pDestCbCr, iLineSize, iAlpha, iBeta, iTc);
-      }
-
-    }
-  }
-
-  // chroma h
-
-  if (iBoundryFlag & TOP_FLAG_MASK) {
-    for (int i = 0; i < 2; i++) {
-      pFilter->iChromaQP[i] = (pCurQp[i] + pCurDqLayer->pChromaQp[iMbXyIndex - iMbWidth][i] + 1) >> 1;
-    }
-    FilteringEdgeChromaIntraH (pFilter, pDestCb, pDestCr, iLineSize, NULL);
-  }
-
-  pFilter->iChromaQP[0]   = pCurQp[0];
-  pFilter->iChromaQP[1]   = pCurQp[1];
-
-  if (pFilter->iChromaQP[0] == pFilter->iChromaQP[1]) {
-    GET_ALPHA_BETA_FROM_QP (pFilter->iChromaQP[0], pFilter->iSliceAlphaC0Offset, pFilter->iSliceBetaOffset, iIndexA, iAlpha,
-                            iBeta);
-    if (iAlpha | iBeta) {
-      TC0_TBL_LOOKUP (iTc, iIndexA, uiBSx4, 1);
-      pFilter->pLoopf->pfChromaDeblockingLT4Ver (&pDestCb[ (2 << 1)*iLineSize], &pDestCr[ (2 << 1)*iLineSize], iLineSize,
-          iAlpha, iBeta, iTc);
-    }
-  } else {
-    for (int i = 0; i < 2; i++) {
-
-      GET_ALPHA_BETA_FROM_QP (pFilter->iChromaQP[i], pFilter->iSliceAlphaC0Offset, pFilter->iSliceBetaOffset, iIndexA, iAlpha,
-                              iBeta);
-      if (iAlpha | iBeta) {
-        TC0_TBL_LOOKUP (iTc, iIndexA, uiBSx4, 1);
-        uint8_t* pDestCbCr = (i == 0) ? &pDestCb[ (2 << 1) * iLineSize] : &pDestCr[ (2 << 1) * iLineSize];
-        pFilter->pLoopf->pfChromaDeblockingLT4Ver2 (pDestCbCr, iLineSize,
-            iAlpha, iBeta, iTc);
-      }
-    }
-
-
-  }
-}
-
-// merge h&v lookup table operation to save performance
-void DeblockingIntraMb (PDqLayer pCurDqLayer, PDeblockingFilter  pFilter, int32_t iBoundryFlag) {
-  FilteringEdgeLumaHV (pCurDqLayer, pFilter, iBoundryFlag);
-  FilteringEdgeChromaHV (pCurDqLayer, pFilter, iBoundryFlag);
-}
-
-void WelsDeblockingMb (PDqLayer pCurDqLayer, PDeblockingFilter  pFilter, int32_t iBoundryFlag) {
-  uint8_t nBS[2][4][4] = {{{ 0 }}};
-
-  int32_t iMbXyIndex  = pCurDqLayer->iMbXyIndex;
-  int32_t iCurMbType  = pCurDqLayer->pMbType[iMbXyIndex];
-  int32_t iMbNb;
-
-  switch (iCurMbType) {
-  case MB_TYPE_INTRA4x4:
-  case MB_TYPE_INTRA8x8:
-  case MB_TYPE_INTRA16x16:
-  case MB_TYPE_INTRA_PCM:
-    DeblockingIntraMb (pCurDqLayer, pFilter, iBoundryFlag);
-    break;
-  default:
-
-    if (iBoundryFlag & LEFT_FLAG_MASK) {
-      iMbNb = iMbXyIndex - 1;
-      * (uint32_t*)nBS[0][0] = IS_INTRA (pCurDqLayer->pMbType[iMbNb]) ? 0x04040404 : DeblockingBsMarginalMBAvcbase (
-                                 pCurDqLayer, 0, iMbNb, iMbXyIndex);
-    } else {
-      * (uint32_t*)nBS[0][0] = 0;
-    }
-    if (iBoundryFlag & TOP_FLAG_MASK) {
-      iMbNb = iMbXyIndex - pCurDqLayer->iMbWidth;
-      * (uint32_t*)nBS[1][0] = IS_INTRA (pCurDqLayer->pMbType[iMbNb]) ? 0x04040404 : DeblockingBsMarginalMBAvcbase (
-                                 pCurDqLayer, 1, iMbNb, iMbXyIndex);
-    } else {
-      * (uint32_t*)nBS[1][0] = 0;
-    }
-    //SKIP MB_16x16 or others
-    if (iCurMbType != MB_TYPE_SKIP) {
-      if (iCurMbType == MB_TYPE_16x16) {
-        if (!pCurDqLayer->pTransformSize8x8Flag[pCurDqLayer->iMbXyIndex]) {
-          DeblockingBSInsideMBAvsbase (pCurDqLayer->pNzc[iMbXyIndex], nBS, 1);
-        } else {
-          DeblockingBSInsideMBAvsbase8x8 (pCurDqLayer->pNzc[iMbXyIndex], nBS, 1);
-        }
-      } else {
-        DeblockingBSInsideMBNormal (pCurDqLayer, nBS, pCurDqLayer->pNzc[iMbXyIndex], iMbXyIndex);
-      }
-    } else {
-      * (uint32_t*)nBS[0][1] = * (uint32_t*)nBS[0][2] = * (uint32_t*)nBS[0][3] =
-                                 * (uint32_t*)nBS[1][1] = * (uint32_t*)nBS[1][2] = * (uint32_t*)nBS[1][3] = 0;
-    }
-    DeblockingInterMb (pCurDqLayer, pFilter, nBS, iBoundryFlag);
-    break;
-  }
-}
-
-/*!
- * \brief   AVC slice deblocking filtering target layer
- *
- * \param   dec         Wels avc decoder context
- *
- * \return  NONE
- */
-void WelsDeblockingFilterSlice (PWelsDecoderContext pCtx, PDeblockingFilterMbFunc pDeblockMb) {
-  PDqLayer pCurDqLayer = pCtx->pCurDqLayer;
-  PSliceHeaderExt pSliceHeaderExt = &pCurDqLayer->sLayerInfo.sSliceInLayer.sSliceHeaderExt;
-  int32_t iMbWidth  = pCurDqLayer->iMbWidth;
-  int32_t iTotalMbCount = pSliceHeaderExt->sSliceHeader.pSps->uiTotalMbCount;
-
-  SDeblockingFilter pFilter;
-  memset (&pFilter, 0, sizeof (pFilter));
-  PFmo pFmo = pCtx->pFmo;
-  int32_t iNextMbXyIndex = 0;
-  int32_t iTotalNumMb = pCurDqLayer->sLayerInfo.sSliceInLayer.iTotalMbInCurSlice;
-  int32_t iCountNumMb = 0;
-  int32_t iBoundryFlag;
-  int32_t iFilterIdc = pCurDqLayer->sLayerInfo.sSliceInLayer.sSliceHeaderExt.sSliceHeader.uiDisableDeblockingFilterIdc;
-
-  /* Step1: parameters set */
-  pFilter.pCsData[0] = pCtx->pDec->pData[0];
-  pFilter.pCsData[1] = pCtx->pDec->pData[1];
-  pFilter.pCsData[2] = pCtx->pDec->pData[2];
-
-  pFilter.iCsStride[0] = pCtx->pDec->iLinesize[0];
-  pFilter.iCsStride[1] = pCtx->pDec->iLinesize[1];
-
-  pFilter.eSliceType = (EWelsSliceType) pCurDqLayer->sLayerInfo.sSliceInLayer.eSliceType;
-
-  pFilter.iSliceAlphaC0Offset = pSliceHeaderExt->sSliceHeader.iSliceAlphaC0Offset;
-  pFilter.iSliceBetaOffset     = pSliceHeaderExt->sSliceHeader.iSliceBetaOffset;
-
-  pFilter.pLoopf = &pCtx->sDeblockingFunc;
-
-  /* Step2: macroblock deblocking */
-  if (0 == iFilterIdc || 2 == iFilterIdc) {
-    iNextMbXyIndex = pSliceHeaderExt->sSliceHeader.iFirstMbInSlice;
-    pCurDqLayer->iMbX  = iNextMbXyIndex % iMbWidth;
-    pCurDqLayer->iMbY  = iNextMbXyIndex / iMbWidth;
-    pCurDqLayer->iMbXyIndex = iNextMbXyIndex;
-
-    do {
-      iBoundryFlag = DeblockingAvailableNoInterlayer (pCurDqLayer, iFilterIdc);
-
-      pDeblockMb (pCurDqLayer, &pFilter, iBoundryFlag);
-
-      ++iCountNumMb;
-      if (iCountNumMb >= iTotalNumMb) {
-        break;
-      }
-
-      if (pSliceHeaderExt->sSliceHeader.pPps->uiNumSliceGroups > 1) {
-        iNextMbXyIndex = FmoNextMb (pFmo, iNextMbXyIndex);
-      } else {
-        ++iNextMbXyIndex;
-      }
-      if (-1 == iNextMbXyIndex || iNextMbXyIndex >= iTotalMbCount) { // slice group boundary or end of a frame
-        break;
-      }
-
-      pCurDqLayer->iMbX  = iNextMbXyIndex % iMbWidth;
-      pCurDqLayer->iMbY  = iNextMbXyIndex / iMbWidth;
-      pCurDqLayer->iMbXyIndex = iNextMbXyIndex;
-    } while (1);
-  }
-}
-/*!
- * \brief   deblocking module initialize
- *
- * \param   pf
- *          cpu
- *
- * \return  NONE
- */
-
-void  DeblockingInit (SDeblockingFunc*  pFunc,  int32_t iCpu) {
-  pFunc->pfLumaDeblockingLT4Ver     = DeblockLumaLt4V_c;
-  pFunc->pfLumaDeblockingEQ4Ver     = DeblockLumaEq4V_c;
-  pFunc->pfLumaDeblockingLT4Hor     = DeblockLumaLt4H_c;
-  pFunc->pfLumaDeblockingEQ4Hor     = DeblockLumaEq4H_c;
-
-  pFunc->pfChromaDeblockingLT4Ver   = DeblockChromaLt4V_c;
-  pFunc->pfChromaDeblockingEQ4Ver   = DeblockChromaEq4V_c;
-  pFunc->pfChromaDeblockingLT4Hor   = DeblockChromaLt4H_c;
-  pFunc->pfChromaDeblockingEQ4Hor   = DeblockChromaEq4H_c;
-
-  pFunc->pfChromaDeblockingLT4Ver2  = DeblockChromaLt4V2_c;
-  pFunc->pfChromaDeblockingEQ4Ver2  = DeblockChromaEq4V2_c;
-  pFunc->pfChromaDeblockingLT4Hor2  = DeblockChromaLt4H2_c;
-  pFunc->pfChromaDeblockingEQ4Hor2  = DeblockChromaEq4H2_c;
-
-#ifdef X86_ASM
-  if (iCpu & WELS_CPU_SSSE3) {
-    pFunc->pfLumaDeblockingLT4Ver   = DeblockLumaLt4V_ssse3;
-    pFunc->pfLumaDeblockingEQ4Ver   = DeblockLumaEq4V_ssse3;
-    pFunc->pfLumaDeblockingLT4Hor   = DeblockLumaLt4H_ssse3;
-    pFunc->pfLumaDeblockingEQ4Hor   = DeblockLumaEq4H_ssse3;
-    pFunc->pfChromaDeblockingLT4Ver = DeblockChromaLt4V_ssse3;
-    pFunc->pfChromaDeblockingEQ4Ver = DeblockChromaEq4V_ssse3;
-    pFunc->pfChromaDeblockingLT4Hor = DeblockChromaLt4H_ssse3;
-    pFunc->pfChromaDeblockingEQ4Hor = DeblockChromaEq4H_ssse3;
-  }
-#endif
-
-#if defined(HAVE_NEON)
-  if (iCpu & WELS_CPU_NEON) {
-    pFunc->pfLumaDeblockingLT4Ver   = DeblockLumaLt4V_neon;
-    pFunc->pfLumaDeblockingEQ4Ver   = DeblockLumaEq4V_neon;
-    pFunc->pfLumaDeblockingLT4Hor   = DeblockLumaLt4H_neon;
-    pFunc->pfLumaDeblockingEQ4Hor   = DeblockLumaEq4H_neon;
-
-    pFunc->pfChromaDeblockingLT4Ver = DeblockChromaLt4V_neon;
-    pFunc->pfChromaDeblockingEQ4Ver = DeblockChromaEq4V_neon;
-    pFunc->pfChromaDeblockingLT4Hor = DeblockChromaLt4H_neon;
-    pFunc->pfChromaDeblockingEQ4Hor = DeblockChromaEq4H_neon;
-  }
-#endif
-
-#if defined(HAVE_NEON_AARCH64)
-  if (iCpu & WELS_CPU_NEON) {
-    pFunc->pfLumaDeblockingLT4Ver   = DeblockLumaLt4V_AArch64_neon;
-    pFunc->pfLumaDeblockingEQ4Ver   = DeblockLumaEq4V_AArch64_neon;
-    pFunc->pfLumaDeblockingLT4Hor   = DeblockLumaLt4H_AArch64_neon;
-    pFunc->pfLumaDeblockingEQ4Hor   = DeblockLumaEq4H_AArch64_neon;
-
-    pFunc->pfChromaDeblockingLT4Ver = DeblockChromaLt4V_AArch64_neon;
-    pFunc->pfChromaDeblockingEQ4Ver = DeblockChromaEq4V_AArch64_neon;
-    pFunc->pfChromaDeblockingLT4Hor = DeblockChromaLt4H_AArch64_neon;
-    pFunc->pfChromaDeblockingEQ4Hor = DeblockChromaEq4H_AArch64_neon;
-  }
-#endif
-}
-
-} // namespace WelsDec
+/*!
+ * \copy
+ *     Copyright (c)  2010-2013, Cisco Systems
+ *     All rights reserved.
+ *
+ *     Redistribution and use in source and binary forms, with or without
+ *     modification, are permitted provided that the following conditions
+ *     are met:
+ *
+ *        * Redistributions of source code must retain the above copyright
+ *          notice, this list of conditions and the following disclaimer.
+ *
+ *        * Redistributions in binary form must reproduce the above copyright
+ *          notice, this list of conditions and the following disclaimer in
+ *          the documentation and/or other materials provided with the
+ *          distribution.
+ *
+ *     THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *     "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *     LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+ *     FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+ *     COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ *     INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ *     BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ *     LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+ *     CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ *     LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
+ *     ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ *     POSSIBILITY OF SUCH DAMAGE.
+ *
+ *
+ * \file    deblocking.c
+ *
+ * \brief   Interfaces introduced in frame deblocking filtering
+ *
+ * \date    08/02/2010
+ *
+ *************************************************************************************
+ */
+
+#include "deblocking.h"
+#include "deblocking_common.h"
+#include "cpu_core.h"
+
+namespace WelsDec {
+
+#define NO_SUPPORTED_FILTER_IDX     (-1)
+#define LEFT_FLAG_BIT 0
+#define TOP_FLAG_BIT 1
+#define LEFT_FLAG_MASK 0x01
+#define TOP_FLAG_MASK 0x02
+
+#define SAME_MB_DIFF_REFIDX
+#define g_kuiAlphaTable(x) g_kuiAlphaTable[(x)+12]
+#define g_kiBetaTable(x)  g_kiBetaTable[(x)+12]
+#define g_kiTc0Table(x)   g_kiTc0Table[(x)+12]
+
+#define MB_BS_MV(iRefIndex, iMotionVector, iMbXy, iMbBn, iIndex, iNeighIndex) \
+(\
+    ( iRefIndex[iMbXy][iIndex] - iRefIndex[iMbBn][iNeighIndex] )||\
+    ( WELS_ABS( iMotionVector[iMbXy][iIndex][0] - iMotionVector[iMbBn][iNeighIndex][0] ) >= 4 ) ||\
+    ( WELS_ABS( iMotionVector[iMbXy][iIndex][1] - iMotionVector[iMbBn][iNeighIndex][1] ) >= 4 )\
+)
+
+#if defined(SAME_MB_DIFF_REFIDX)
+#define SMB_EDGE_MV(iRefIndex, iMotionVector, iIndex, iNeighIndex) \
+(\
+    ( iRefIndex[iIndex] - iRefIndex[iNeighIndex] )||(\
+    ( WELS_ABS( iMotionVector[iIndex][0] - iMotionVector[iNeighIndex][0] ) &(~3) ) |\
+    ( WELS_ABS( iMotionVector[iIndex][1] - iMotionVector[iNeighIndex][1] ) &(~3) ))\
+)
+#else
+#define SMB_EDGE_MV(iRefIndex, iMotionVector, iIndex, iNeighIndex) \
+(\
+    !!(( WELS_ABS( iMotionVector[iIndex][0] - iMotionVector[iNeighIndex][0] ) &(~3) ) |( WELS_ABS( iMotionVector[iIndex][1] - iMotionVector[iNeighIndex][1] ) &(~3) ))\
+)
+#endif
+
+#define BS_EDGE(bsx1, iRefIndex, iMotionVector, iIndex, iNeighIndex) \
+( (bsx1|SMB_EDGE_MV(iRefIndex, iMotionVector, iIndex, iNeighIndex))<<((uint8_t)(!!bsx1)))
+
+#define GET_ALPHA_BETA_FROM_QP(iQp, iAlphaOffset, iBetaOffset, iIndex, iAlpha, iBeta) \
+{\
+  iIndex = (iQp + iAlphaOffset);\
+  iAlpha = g_kuiAlphaTable(iIndex);\
+  iBeta  = g_kiBetaTable((iQp + iBetaOffset));\
+}
+
+static const uint8_t g_kuiAlphaTable[52 + 24] = { //this table refers to Table 8-16 in H.264/AVC standard
+  0,  0,  0,  0,  0,  0,  0,  0,  0,  0, 0,  0,
+  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
+  0,  0,  0,  0,  0,  0,  4,  4,  5,  6,
+  7,  8,  9, 10, 12, 13, 15, 17, 20, 22,
+  25, 28, 32, 36, 40, 45, 50, 56, 63, 71,
+  80, 90, 101, 113, 127, 144, 162, 182, 203, 226,
+  255, 255
+  , 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255
+};
+
+static const int8_t g_kiBetaTable[52 + 24] = { //this table refers to Table 8-16 in H.264/AVC standard
+  0,  0,  0,  0,  0,  0,  0,  0,  0,  0, 0,  0,
+  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
+  0,  0,  0,  0,  0,  0,  2,  2,  2,  3,
+  3,  3,  3,  4,  4,  4,  6,  6,  7,  7,
+  8,  8,  9,  9, 10, 10, 11, 11, 12, 12,
+  13, 13, 14, 14, 15, 15, 16, 16, 17, 17,
+  18, 18
+  , 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18
+};
+
+static const int8_t g_kiTc0Table[52 + 24][4] = { //this table refers Table 8-17 in H.264/AVC standard
+  { -1, 0, 0, 0 }, { -1, 0, 0, 0 }, { -1, 0, 0, 0 }, { -1, 0, 0, 0 }, { -1, 0, 0, 0 }, { -1, 0, 0, 0 },
+  { -1, 0, 0, 0 }, { -1, 0, 0, 0 }, { -1, 0, 0, 0 }, { -1, 0, 0, 0 }, { -1, 0, 0, 0 }, { -1, 0, 0, 0 },
+  { -1, 0, 0, 0 }, { -1, 0, 0, 0 }, { -1, 0, 0, 0 }, { -1, 0, 0, 0 }, { -1, 0, 0, 0 }, { -1, 0, 0, 0 },
+  { -1, 0, 0, 0 }, { -1, 0, 0, 0 }, { -1, 0, 0, 0 }, { -1, 0, 0, 0 }, { -1, 0, 0, 0 }, { -1, 0, 0, 0 },
+  { -1, 0, 0, 0 }, { -1, 0, 0, 0 }, { -1, 0, 0, 0 }, { -1, 0, 0, 0 }, { -1, 0, 0, 0 }, { -1, 0, 0, 1 },
+  { -1, 0, 0, 1 }, { -1, 0, 0, 1 }, { -1, 0, 0, 1 }, { -1, 0, 1, 1 }, { -1, 0, 1, 1 }, { -1, 1, 1, 1 },
+  { -1, 1, 1, 1 }, { -1, 1, 1, 1 }, { -1, 1, 1, 1 }, { -1, 1, 1, 2 }, { -1, 1, 1, 2 }, { -1, 1, 1, 2 },
+  { -1, 1, 1, 2 }, { -1, 1, 2, 3 }, { -1, 1, 2, 3 }, { -1, 2, 2, 3 }, { -1, 2, 2, 4 }, { -1, 2, 3, 4 },
+  { -1, 2, 3, 4 }, { -1, 3, 3, 5 }, { -1, 3, 4, 6 }, { -1, 3, 4, 6 }, { -1, 4, 5, 7 }, { -1, 4, 5, 8 },
+  { -1, 4, 6, 9 }, { -1, 5, 7, 10 }, { -1, 6, 8, 11 }, { -1, 6, 8, 13 }, { -1, 7, 10, 14 }, { -1, 8, 11, 16 },
+  { -1, 9, 12, 18 }, { -1, 10, 13, 20 }, { -1, 11, 15, 23 }, { -1, 13, 17, 25 }
+  , { -1, 13, 17, 25 }, { -1, 13, 17, 25 }, { -1, 13, 17, 25 }, { -1, 13, 17, 25 }, { -1, 13, 17, 25 }, { -1, 13, 17, 25 }
+  , { -1, 13, 17, 25 }, { -1, 13, 17, 25 }, { -1, 13, 17, 25 }, { -1, 13, 17, 25 }, { -1, 13, 17, 25 }, { -1, 13, 17, 25 }
+};
+
+static const uint8_t g_kuiTableBIdx[2][8] = {
+  {
+    0,  4,  8,  12,
+    3,  7,  11, 15
+  },
+
+  {
+    0,  1,  2,  3,
+    12, 13, 14, 15
+  },
+};
+
+static const uint8_t g_kuiTableB8x8Idx[2][16] = {
+  {
+    0,  1,  4,  5,  8,  9,  12, 13,   // 0   1 |  2  3
+    2,  3,  6,  7, 10, 11,  14, 15    // 4   5 |  6  7
+  },                                  // ------------
+  // 8   9 | 10 11
+  {
+    // 12 13 | 14 15
+    0,  1,  4,  5,  2,  3,  6,  7,
+    8,  9,  12, 13, 10, 11, 14, 15
+  },
+};
+
+#define TC0_TBL_LOOKUP(tc, iIndexA, pBS, bChroma) \
+{\
+  tc[0] = g_kiTc0Table(iIndexA)[pBS[0]] + bChroma;\
+  tc[1] = g_kiTc0Table(iIndexA)[pBS[1]] + bChroma;\
+  tc[2] = g_kiTc0Table(iIndexA)[pBS[2]] + bChroma;\
+  tc[3] = g_kiTc0Table(iIndexA)[pBS[3]] + bChroma;\
+}
+
+void inline DeblockingBSInsideMBAvsbase (int8_t* pNnzTab, uint8_t nBS[2][4][4], int32_t iLShiftFactor) {
+  uint32_t uiNnz32b0, uiNnz32b1, uiNnz32b2, uiNnz32b3;
+
+  uiNnz32b0 = * (uint32_t*) (pNnzTab + 0);
+  uiNnz32b1 = * (uint32_t*) (pNnzTab + 4);
+  uiNnz32b2 = * (uint32_t*) (pNnzTab + 8);
+  uiNnz32b3 = * (uint32_t*) (pNnzTab + 12);
+
+  nBS[0][1][0] = (pNnzTab[0] | pNnzTab[1]) << iLShiftFactor;
+  nBS[0][2][0] = (pNnzTab[1] | pNnzTab[2]) << iLShiftFactor;
+  nBS[0][3][0] = (pNnzTab[2] | pNnzTab[3]) << iLShiftFactor;
+
+  nBS[0][1][1] = (pNnzTab[4] | pNnzTab[5]) << iLShiftFactor;
+  nBS[0][2][1] = (pNnzTab[5] | pNnzTab[6]) << iLShiftFactor;
+  nBS[0][3][1] = (pNnzTab[6] | pNnzTab[7]) << iLShiftFactor;
+  * (uint32_t*)nBS[1][1] = (uiNnz32b0 | uiNnz32b1) << iLShiftFactor;
+
+  nBS[0][1][2] = (pNnzTab[8]  | pNnzTab[9])  << iLShiftFactor;
+  nBS[0][2][2] = (pNnzTab[9]  | pNnzTab[10]) << iLShiftFactor;
+  nBS[0][3][2] = (pNnzTab[10] | pNnzTab[11]) << iLShiftFactor;
+  * (uint32_t*)nBS[1][2] = (uiNnz32b1 | uiNnz32b2) << iLShiftFactor;
+
+  nBS[0][1][3] = (pNnzTab[12] | pNnzTab[13]) << iLShiftFactor;
+  nBS[0][2][3] = (pNnzTab[13] | pNnzTab[14]) << iLShiftFactor;
+  nBS[0][3][3] = (pNnzTab[14] | pNnzTab[15]) << iLShiftFactor;
+  * (uint32_t*)nBS[1][3] = (uiNnz32b2 | uiNnz32b3) << iLShiftFactor;
+}
+
+void inline DeblockingBSInsideMBAvsbase8x8 (int8_t* pNnzTab, uint8_t nBS[2][4][4], int32_t iLShiftFactor) {
+  int8_t i8x8NnzTab[4];
+  for (int32_t i = 0; i < 4; i++) {
+    int32_t iBlkIdx = i << 2;
+    i8x8NnzTab[i] = (pNnzTab[g_kuiMbCountScan4Idx[iBlkIdx]] | pNnzTab[g_kuiMbCountScan4Idx[iBlkIdx + 1]] |
+                     pNnzTab[g_kuiMbCountScan4Idx[iBlkIdx + 2]] | pNnzTab[g_kuiMbCountScan4Idx[iBlkIdx + 3]]);
+  }
+
+  //vertical
+  nBS[0][2][0] = nBS[0][2][1] = (i8x8NnzTab[0] | i8x8NnzTab[1]) << iLShiftFactor;
+  nBS[0][2][2] = nBS[0][2][3] = (i8x8NnzTab[2] | i8x8NnzTab[3]) << iLShiftFactor;
+  //horizontal
+  nBS[1][2][0] = nBS[1][2][1] = (i8x8NnzTab[0] | i8x8NnzTab[2]) << iLShiftFactor;
+  nBS[1][2][2] = nBS[1][2][3] = (i8x8NnzTab[1] | i8x8NnzTab[3]) << iLShiftFactor;
+}
+
+void static inline DeblockingBSInsideMBNormal (PDqLayer pCurDqLayer, uint8_t nBS[2][4][4], int8_t* pNnzTab,
+    int32_t iMbXy) {
+  uint32_t uiNnz32b0, uiNnz32b1, uiNnz32b2, uiNnz32b3;
+  int8_t* iRefIndex = pCurDqLayer->pRefIndex[LIST_0][iMbXy];
+  ENFORCE_STACK_ALIGN_1D (uint8_t, uiBsx4, 4, 4);
+
+  int8_t i8x8NnzTab[4];
+
+  if (pCurDqLayer->pTransformSize8x8Flag[iMbXy]) {
+    for (int32_t i = 0; i < 4; i++) {
+      int32_t iBlkIdx = i << 2;
+      i8x8NnzTab[i] = (pNnzTab[g_kuiMbCountScan4Idx[iBlkIdx]] | pNnzTab[g_kuiMbCountScan4Idx[iBlkIdx + 1]] |
+                       pNnzTab[g_kuiMbCountScan4Idx[iBlkIdx + 2]] | pNnzTab[g_kuiMbCountScan4Idx[iBlkIdx + 3]]);
+    }
+    //vertical
+    nBS[0][2][0] = nBS[0][2][1] = BS_EDGE ((i8x8NnzTab[0] | i8x8NnzTab[1]), iRefIndex, pCurDqLayer->pMv[LIST_0][iMbXy],
+                                           g_kuiMbCountScan4Idx[1 << 2], g_kuiMbCountScan4Idx[0]);
+    nBS[0][2][2] = nBS[0][2][3] = BS_EDGE ((i8x8NnzTab[2] | i8x8NnzTab[3]), iRefIndex, pCurDqLayer->pMv[LIST_0][iMbXy],
+                                           g_kuiMbCountScan4Idx[3 << 2], g_kuiMbCountScan4Idx[2 << 2]);
+
+    //horizontal
+    nBS[1][2][0] = nBS[1][2][1] = BS_EDGE ((i8x8NnzTab[0] | i8x8NnzTab[2]), iRefIndex, pCurDqLayer->pMv[LIST_0][iMbXy],
+                                           g_kuiMbCountScan4Idx[2 << 2], g_kuiMbCountScan4Idx[0]);
+    nBS[1][2][2] = nBS[1][2][3] = BS_EDGE ((i8x8NnzTab[1] | i8x8NnzTab[3]), iRefIndex, pCurDqLayer->pMv[LIST_0][iMbXy],
+                                           g_kuiMbCountScan4Idx[3 << 2], g_kuiMbCountScan4Idx[1 << 2]);
+  } else {
+    uiNnz32b0 = * (uint32_t*) (pNnzTab + 0);
+    uiNnz32b1 = * (uint32_t*) (pNnzTab + 4);
+    uiNnz32b2 = * (uint32_t*) (pNnzTab + 8);
+    uiNnz32b3 = * (uint32_t*) (pNnzTab + 12);
+
+    for (int i = 0; i < 3; i++)
+      uiBsx4[i] = pNnzTab[i] | pNnzTab[i + 1];
+    nBS[0][1][0] = BS_EDGE (uiBsx4[0], iRefIndex, pCurDqLayer->pMv[LIST_0][iMbXy], 1, 0);
+    nBS[0][2][0] = BS_EDGE (uiBsx4[1], iRefIndex, pCurDqLayer->pMv[LIST_0][iMbXy], 2, 1);
+    nBS[0][3][0] = BS_EDGE (uiBsx4[2], iRefIndex, pCurDqLayer->pMv[LIST_0][iMbXy], 3, 2);
+
+    for (int i = 0; i < 3; i++)
+      uiBsx4[i] = pNnzTab[4 + i] | pNnzTab[4 + i + 1];
+    nBS[0][1][1] = BS_EDGE (uiBsx4[0], iRefIndex, pCurDqLayer->pMv[LIST_0][iMbXy], 5, 4);
+    nBS[0][2][1] = BS_EDGE (uiBsx4[1], iRefIndex, pCurDqLayer->pMv[LIST_0][iMbXy], 6, 5);
+    nBS[0][3][1] = BS_EDGE (uiBsx4[2], iRefIndex, pCurDqLayer->pMv[LIST_0][iMbXy], 7, 6);
+
+    for (int i = 0; i < 3; i++)
+      uiBsx4[i] = pNnzTab[8 + i] | pNnzTab[8 + i + 1];
+    nBS[0][1][2] = BS_EDGE (uiBsx4[0], iRefIndex, pCurDqLayer->pMv[LIST_0][iMbXy], 9, 8);
+    nBS[0][2][2] = BS_EDGE (uiBsx4[1], iRefIndex, pCurDqLayer->pMv[LIST_0][iMbXy], 10, 9);
+    nBS[0][3][2] = BS_EDGE (uiBsx4[2], iRefIndex, pCurDqLayer->pMv[LIST_0][iMbXy], 11, 10);
+
+    for (int i = 0; i < 3; i++)
+      uiBsx4[i] = pNnzTab[12 + i] | pNnzTab[12 + i + 1];
+    nBS[0][1][3] = BS_EDGE (uiBsx4[0], iRefIndex, pCurDqLayer->pMv[LIST_0][iMbXy], 13, 12);
+    nBS[0][2][3] = BS_EDGE (uiBsx4[1], iRefIndex, pCurDqLayer->pMv[LIST_0][iMbXy], 14, 13);
+    nBS[0][3][3] = BS_EDGE (uiBsx4[2], iRefIndex, pCurDqLayer->pMv[LIST_0][iMbXy], 15, 14);
+
+    // horizontal
+    * (uint32_t*)uiBsx4 = (uiNnz32b0 | uiNnz32b1);
+    nBS[1][1][0] = BS_EDGE (uiBsx4[0], iRefIndex, pCurDqLayer->pMv[LIST_0][iMbXy], 4, 0);
+    nBS[1][1][1] = BS_EDGE (uiBsx4[1], iRefIndex, pCurDqLayer->pMv[LIST_0][iMbXy], 5, 1);
+    nBS[1][1][2] = BS_EDGE (uiBsx4[2], iRefIndex, pCurDqLayer->pMv[LIST_0][iMbXy], 6, 2);
+    nBS[1][1][3] = BS_EDGE (uiBsx4[3], iRefIndex, pCurDqLayer->pMv[LIST_0][iMbXy], 7, 3);
+
+    * (uint32_t*)uiBsx4 = (uiNnz32b1 | uiNnz32b2);
+    nBS[1][2][0] = BS_EDGE (uiBsx4[0], iRefIndex, pCurDqLayer->pMv[LIST_0][iMbXy], 8, 4);
+    nBS[1][2][1] = BS_EDGE (uiBsx4[1], iRefIndex, pCurDqLayer->pMv[LIST_0][iMbXy], 9, 5);
+    nBS[1][2][2] = BS_EDGE (uiBsx4[2], iRefIndex, pCurDqLayer->pMv[LIST_0][iMbXy], 10, 6);
+    nBS[1][2][3] = BS_EDGE (uiBsx4[3], iRefIndex, pCurDqLayer->pMv[LIST_0][iMbXy], 11, 7);
+
+    * (uint32_t*)uiBsx4 = (uiNnz32b2 | uiNnz32b3);
+    nBS[1][3][0] = BS_EDGE (uiBsx4[0], iRefIndex, pCurDqLayer->pMv[LIST_0][iMbXy], 12, 8);
+    nBS[1][3][1] = BS_EDGE (uiBsx4[1], iRefIndex, pCurDqLayer->pMv[LIST_0][iMbXy], 13, 9);
+    nBS[1][3][2] = BS_EDGE (uiBsx4[2], iRefIndex, pCurDqLayer->pMv[LIST_0][iMbXy], 14, 10);
+    nBS[1][3][3] = BS_EDGE (uiBsx4[3], iRefIndex, pCurDqLayer->pMv[LIST_0][iMbXy], 15, 11);
+  }
+}
+
+void static inline DeblockingBSliceBSInsideMBNormal (PDqLayer pCurDqLayer, uint8_t nBS[2][4][4], int8_t* pNnzTab,
+    int32_t iMbXy) {
+  uint32_t uiNnz32b0, uiNnz32b1, uiNnz32b2, uiNnz32b3;
+  int8_t* iRefIndex[LIST_A];
+  iRefIndex[LIST_0] = pCurDqLayer->pRefIndex[LIST_0][iMbXy];
+  iRefIndex[LIST_1] = pCurDqLayer->pRefIndex[LIST_1][iMbXy];
+  ENFORCE_STACK_ALIGN_1D (uint8_t, uiBsx4, 4, 4);
+
+  int8_t i8x8NnzTab[4];
+
+  if (pCurDqLayer->pTransformSize8x8Flag[iMbXy]) {
+    for (int32_t i = 0; i < 4; i++) {
+      int32_t iBlkIdx = i << 2;
+      i8x8NnzTab[i] = (pNnzTab[g_kuiMbCountScan4Idx[iBlkIdx]] | pNnzTab[g_kuiMbCountScan4Idx[iBlkIdx + 1]] |
+                       pNnzTab[g_kuiMbCountScan4Idx[iBlkIdx + 2]] | pNnzTab[g_kuiMbCountScan4Idx[iBlkIdx + 3]]);
+    }
+    //vertical
+    int8_t iIndex = g_kuiMbCountScan4Idx[1 << 2];
+    int8_t iNeigborIndex = g_kuiMbCountScan4Idx[0];
+    nBS[0][2][0] = nBS[0][2][1] = 1;
+    for (int32_t listIdx = LIST_0; listIdx < LIST_A; ++listIdx) {
+      if (iRefIndex[listIdx][iIndex] > REF_NOT_IN_LIST && iRefIndex[listIdx][iNeigborIndex] > REF_NOT_IN_LIST) {
+        nBS[0][2][0] = nBS[0][2][1] = BS_EDGE ((i8x8NnzTab[0] | i8x8NnzTab[1]), iRefIndex[listIdx],
+                                               pCurDqLayer->pMv[listIdx][iMbXy],
+                                               iIndex, iNeigborIndex);
+        break;
+      }
+    }
+    iIndex = g_kuiMbCountScan4Idx[3 << 2];
+    iNeigborIndex = g_kuiMbCountScan4Idx[2 << 2];
+    nBS[0][2][2] = nBS[0][2][3] = 1;
+    for (int32_t listIdx = LIST_0; listIdx < LIST_A; ++listIdx) {
+      if (iRefIndex[listIdx][iIndex] > REF_NOT_IN_LIST && iRefIndex[listIdx][iNeigborIndex] > REF_NOT_IN_LIST) {
+        nBS[0][2][2] = nBS[0][2][3] = BS_EDGE ((i8x8NnzTab[2] | i8x8NnzTab[3]), iRefIndex[listIdx],
+                                               pCurDqLayer->pMv[listIdx][iMbXy],
+                                               iIndex, iNeigborIndex);
+        break;
+      }
+    }
+
+    //horizontal
+    iIndex = g_kuiMbCountScan4Idx[2 << 2];
+    iNeigborIndex = g_kuiMbCountScan4Idx[0];
+    nBS[1][2][0] = nBS[1][2][1] = 1;
+    for (int32_t listIdx = LIST_0; listIdx < LIST_A; ++listIdx) {
+      if (iRefIndex[listIdx][iIndex] > REF_NOT_IN_LIST && iRefIndex[listIdx][iNeigborIndex] > REF_NOT_IN_LIST) {
+        nBS[1][2][0] = nBS[1][2][1] = BS_EDGE ((i8x8NnzTab[0] | i8x8NnzTab[2]), iRefIndex[listIdx],
+                                               pCurDqLayer->pMv[listIdx][iMbXy],
+                                               iIndex, iNeigborIndex);
+        break;
+      }
+    }
+
+    iIndex = g_kuiMbCountScan4Idx[3 << 2];
+    iNeigborIndex = g_kuiMbCountScan4Idx[1 << 2];
+    nBS[1][2][2] = nBS[1][2][3] = 1;
+    for (int32_t listIdx = LIST_0; listIdx < LIST_A; ++listIdx) {
+      if (iRefIndex[listIdx][iIndex] > REF_NOT_IN_LIST && iRefIndex[listIdx][iNeigborIndex] > REF_NOT_IN_LIST) {
+        nBS[1][2][2] = nBS[1][2][3] = BS_EDGE ((i8x8NnzTab[1] | i8x8NnzTab[3]), iRefIndex[listIdx],
+                                               pCurDqLayer->pMv[listIdx][iMbXy],
+                                               iIndex, iNeigborIndex);
+        break;
+      }
+    }
+  } else {
+    uiNnz32b0 = * (uint32_t*) (pNnzTab + 0);
+    uiNnz32b1 = * (uint32_t*) (pNnzTab + 4);
+    uiNnz32b2 = * (uint32_t*) (pNnzTab + 8);
+    uiNnz32b3 = * (uint32_t*) (pNnzTab + 12);
+
+    for (int i = 0; i < 3; i++)
+      uiBsx4[i] = pNnzTab[i] | pNnzTab[i + 1];
+    nBS[0][1][0] = 1;
+    for (int32_t listIdx = LIST_0; listIdx < LIST_A; ++listIdx) {
+      if (iRefIndex[listIdx][1] > REF_NOT_IN_LIST && iRefIndex[listIdx][0] > REF_NOT_IN_LIST) {
+        nBS[0][1][0] = BS_EDGE (uiBsx4[0], iRefIndex[listIdx], pCurDqLayer->pMv[listIdx][iMbXy], 1, 0);
+        break;
+      }
+    }
+    nBS[0][2][0] = 1;
+    for (int32_t listIdx = LIST_0; listIdx < LIST_A; ++listIdx) {
+      if (iRefIndex[listIdx][2] > REF_NOT_IN_LIST && iRefIndex[listIdx][1] > REF_NOT_IN_LIST) {
+        nBS[0][2][0] = BS_EDGE (uiBsx4[1], iRefIndex[listIdx], pCurDqLayer->pMv[listIdx][iMbXy], 2, 1);
+        break;
+      }
+    }
+    nBS[0][3][0] = 1;
+    for (int32_t listIdx = LIST_0; listIdx < LIST_A; ++listIdx) {
+      if (iRefIndex[listIdx][3] > REF_NOT_IN_LIST && iRefIndex[listIdx][2] > REF_NOT_IN_LIST) {
+        nBS[0][3][0] = BS_EDGE (uiBsx4[2], iRefIndex[listIdx], pCurDqLayer->pMv[listIdx][iMbXy], 3, 2);
+        break;
+      }
+    }
+
+    for (int i = 0; i < 3; i++)
+      uiBsx4[i] = pNnzTab[4 + i] | pNnzTab[4 + i + 1];
+    nBS[0][1][1] = 1;
+    for (int32_t listIdx = LIST_0; listIdx < LIST_A; ++listIdx) {
+      if (iRefIndex[listIdx][5] > REF_NOT_IN_LIST && iRefIndex[listIdx][4] > REF_NOT_IN_LIST) {
+        nBS[0][1][1] = BS_EDGE (uiBsx4[0], iRefIndex[listIdx], pCurDqLayer->pMv[listIdx][iMbXy], 5, 4);
+        break;
+      }
+    }
+    nBS[0][2][1] = 1;
+    for (int32_t listIdx = LIST_0; listIdx < LIST_A; ++listIdx) {
+      if (iRefIndex[listIdx][6] > REF_NOT_IN_LIST && iRefIndex[listIdx][5] > REF_NOT_IN_LIST) {
+        nBS[0][2][1] = BS_EDGE (uiBsx4[1], iRefIndex[listIdx], pCurDqLayer->pMv[listIdx][iMbXy], 6, 5);
+        break;
+      }
+    }
+    nBS[0][3][1] = 1;
+    for (int32_t listIdx = LIST_0; listIdx < LIST_A; ++listIdx) {
+      if (iRefIndex[listIdx][7] > REF_NOT_IN_LIST && iRefIndex[listIdx][6] > REF_NOT_IN_LIST) {
+        nBS[0][3][1] = BS_EDGE (uiBsx4[2], iRefIndex[listIdx], pCurDqLayer->pMv[listIdx][iMbXy], 7, 6);
+        break;
+      }
+    }
+
+    for (int i = 0; i < 3; i++)
+      uiBsx4[i] = pNnzTab[8 + i] | pNnzTab[8 + i + 1];
+    nBS[0][1][2] = 1;
+    for (int32_t listIdx = LIST_0; listIdx < LIST_A; ++listIdx) {
+      if (iRefIndex[listIdx][9] > REF_NOT_IN_LIST && iRefIndex[listIdx][8] > REF_NOT_IN_LIST) {
+        nBS[0][1][2] = BS_EDGE (uiBsx4[0], iRefIndex[listIdx], pCurDqLayer->pMv[listIdx][iMbXy], 9, 8);
+        break;
+      }
+    }
+    nBS[0][2][2] = 1;
+    for (int32_t listIdx = LIST_0; listIdx < LIST_A; ++listIdx) {
+      if (iRefIndex[listIdx][10] > REF_NOT_IN_LIST && iRefIndex[listIdx][9] > REF_NOT_IN_LIST) {
+        nBS[0][2][2] = BS_EDGE (uiBsx4[1], iRefIndex[listIdx], pCurDqLayer->pMv[listIdx][iMbXy], 10, 9);
+        break;
+      }
+    }
+    nBS[0][3][2] = 1;
+    for (int32_t listIdx = LIST_0; listIdx < LIST_A; ++listIdx) {
+      if (iRefIndex[listIdx][11] > REF_NOT_IN_LIST && iRefIndex[listIdx][10] > REF_NOT_IN_LIST) {
+        nBS[0][3][2] = BS_EDGE (uiBsx4[2], iRefIndex[listIdx], pCurDqLayer->pMv[listIdx][iMbXy], 11, 10);
+        break;
+      }
+    }
+
+    for (int i = 0; i < 3; i++)
+      uiBsx4[i] = pNnzTab[12 + i] | pNnzTab[12 + i + 1];
+    nBS[0][1][3] = 1;
+    for (int32_t listIdx = LIST_0; listIdx < LIST_A; ++listIdx) {
+      if (iRefIndex[listIdx][13] > REF_NOT_IN_LIST && iRefIndex[listIdx][12] > REF_NOT_IN_LIST) {
+        nBS[0][1][3] = BS_EDGE (uiBsx4[0], iRefIndex[listIdx], pCurDqLayer->pMv[listIdx][iMbXy], 13, 12);
+        break;
+      }
+    }
+    nBS[0][2][3] = 1;
+    for (int32_t listIdx = LIST_0; listIdx < LIST_A; ++listIdx) {
+      if (iRefIndex[listIdx][14] > REF_NOT_IN_LIST && iRefIndex[listIdx][13] > REF_NOT_IN_LIST) {
+        nBS[0][2][3] = BS_EDGE (uiBsx4[1], iRefIndex[listIdx], pCurDqLayer->pMv[listIdx][iMbXy], 14, 13);
+        break;
+      }
+    }
+    nBS[0][3][3] = 1;
+    for (int32_t listIdx = LIST_0; listIdx < LIST_A; ++listIdx) {
+      if (iRefIndex[listIdx][15] > REF_NOT_IN_LIST && iRefIndex[listIdx][14] > REF_NOT_IN_LIST) {
+        nBS[0][3][3] = BS_EDGE (uiBsx4[2], iRefIndex[listIdx], pCurDqLayer->pMv[listIdx][iMbXy], 15, 14);
+        break;
+      }
+    }
+
+    // horizontal
+    * (uint32_t*)uiBsx4 = (uiNnz32b0 | uiNnz32b1);
+    nBS[1][1][0] = 1;
+    for (int32_t listIdx = LIST_0; listIdx < LIST_A; ++listIdx) {
+      if (iRefIndex[listIdx][4] > REF_NOT_IN_LIST && iRefIndex[listIdx][0] > REF_NOT_IN_LIST) {
+        nBS[1][1][0] = BS_EDGE (uiBsx4[0], iRefIndex[listIdx], pCurDqLayer->pMv[listIdx][iMbXy], 4, 0);
+        break;
+      }
+    }
+    nBS[1][1][1] = 1;
+    for (int32_t listIdx = LIST_0; listIdx < LIST_A; ++listIdx) {
+      if (iRefIndex[listIdx][5] > REF_NOT_IN_LIST && iRefIndex[listIdx][1] > REF_NOT_IN_LIST) {
+        nBS[1][1][1] = BS_EDGE (uiBsx4[1], iRefIndex[listIdx], pCurDqLayer->pMv[listIdx][iMbXy], 5, 1);
+        break;
+      }
+    }
+    nBS[1][1][2] = 1;
+    for (int32_t listIdx = LIST_0; listIdx < LIST_A; ++listIdx) {
+      if (iRefIndex[listIdx][6] > REF_NOT_IN_LIST && iRefIndex[listIdx][2] > REF_NOT_IN_LIST) {
+        nBS[1][1][2] = BS_EDGE (uiBsx4[2], iRefIndex[listIdx], pCurDqLayer->pMv[listIdx][iMbXy], 6, 2);
+        break;
+      }
+    }
+    nBS[1][1][3] = 1;
+    for (int32_t listIdx = LIST_0; listIdx < LIST_A; ++listIdx) {
+      if (iRefIndex[listIdx][7] > REF_NOT_IN_LIST && iRefIndex[listIdx][3] > REF_NOT_IN_LIST) {
+        nBS[1][1][3] = BS_EDGE (uiBsx4[3], iRefIndex[listIdx], pCurDqLayer->pMv[listIdx][iMbXy], 7, 3);
+        break;
+      }
+    }
+
+    * (uint32_t*)uiBsx4 = (uiNnz32b1 | uiNnz32b2);
+    nBS[1][2][0] = 1;
+    for (int32_t listIdx = LIST_0; listIdx < LIST_A; ++listIdx) {
+      if (iRefIndex[listIdx][8] > REF_NOT_IN_LIST && iRefIndex[listIdx][4] > REF_NOT_IN_LIST) {
+        nBS[1][2][0] = BS_EDGE (uiBsx4[0], iRefIndex[listIdx], pCurDqLayer->pMv[listIdx][iMbXy], 8, 4);
+        break;
+      }
+    }
+    nBS[1][2][1] = 1;
+    for (int32_t listIdx = LIST_0; listIdx < LIST_A; ++listIdx) {
+      if (iRefIndex[listIdx][9] > REF_NOT_IN_LIST && iRefIndex[listIdx][5] > REF_NOT_IN_LIST) {
+        nBS[1][2][1] = BS_EDGE (uiBsx4[1], iRefIndex[listIdx], pCurDqLayer->pMv[listIdx][iMbXy], 9, 5);
+        break;
+      }
+    }
+    nBS[1][2][2] = 1;
+    for (int32_t listIdx = LIST_0; listIdx < LIST_A; ++listIdx) {
+      if (iRefIndex[listIdx][10] > REF_NOT_IN_LIST && iRefIndex[listIdx][6] > REF_NOT_IN_LIST) {
+        nBS[1][2][2] = BS_EDGE (uiBsx4[2], iRefIndex[listIdx], pCurDqLayer->pMv[listIdx][iMbXy], 10, 6);
+        break;
+      }
+    }
+    nBS[1][2][3] = 1;
+    for (int32_t listIdx = LIST_0; listIdx < LIST_A; ++listIdx) {
+      if (iRefIndex[listIdx][11] > REF_NOT_IN_LIST && iRefIndex[listIdx][7] > REF_NOT_IN_LIST) {
+        nBS[1][2][3] = BS_EDGE (uiBsx4[3], iRefIndex[listIdx], pCurDqLayer->pMv[listIdx][iMbXy], 11, 7);
+        break;
+      }
+    }
+
+    * (uint32_t*)uiBsx4 = (uiNnz32b2 | uiNnz32b3);
+    nBS[1][3][0] = 1;
+    for (int32_t listIdx = LIST_0; listIdx < LIST_A; ++listIdx) {
+      if (iRefIndex[listIdx][12] > REF_NOT_IN_LIST && iRefIndex[listIdx][8] > REF_NOT_IN_LIST) {
+        nBS[1][3][0] = BS_EDGE (uiBsx4[0], iRefIndex[listIdx], pCurDqLayer->pMv[listIdx][iMbXy], 12, 8);
+        break;
+      }
+    }
+    nBS[1][3][1] = 1;
+    for (int32_t listIdx = LIST_0; listIdx < LIST_A; ++listIdx) {
+      if (iRefIndex[listIdx][13] > REF_NOT_IN_LIST && iRefIndex[listIdx][9] > REF_NOT_IN_LIST) {
+        nBS[1][3][1] = BS_EDGE (uiBsx4[1], iRefIndex[listIdx], pCurDqLayer->pMv[listIdx][iMbXy], 13, 9);
+        break;
+      }
+    }
+    nBS[1][3][2] = 1;
+    for (int32_t listIdx = LIST_0; listIdx < LIST_A; ++listIdx) {
+      if (iRefIndex[listIdx][14] > REF_NOT_IN_LIST && iRefIndex[listIdx][10] > REF_NOT_IN_LIST) {
+        nBS[1][3][2] = BS_EDGE (uiBsx4[2], iRefIndex[listIdx], pCurDqLayer->pMv[listIdx][iMbXy], 14, 10);
+        break;
+      }
+    }
+    nBS[1][3][3] = 1;
+    for (int32_t listIdx = LIST_0; listIdx < LIST_A; ++listIdx) {
+      if (iRefIndex[listIdx][15] > REF_NOT_IN_LIST && iRefIndex[listIdx][11] > REF_NOT_IN_LIST) {
+        nBS[1][3][3] = BS_EDGE (uiBsx4[3], iRefIndex[listIdx], pCurDqLayer->pMv[listIdx][iMbXy], 15, 11);
+        break;
+      }
+    }
+  }
+}
+
+
+uint32_t DeblockingBsMarginalMBAvcbase (PDqLayer pCurDqLayer, int32_t iEdge, int32_t iNeighMb, int32_t iMbXy) {
+  int32_t i, j;
+  uint32_t uiBSx4;
+  uint8_t* pBS = (uint8_t*) (&uiBSx4);
+  const uint8_t* pBIdx      = &g_kuiTableBIdx[iEdge][0];
+  const uint8_t* pBnIdx     = &g_kuiTableBIdx[iEdge][4];
+  const uint8_t* pB8x8Idx   = &g_kuiTableB8x8Idx[iEdge][0];
+  const uint8_t* pBn8x8Idx  = &g_kuiTableB8x8Idx[iEdge][8];
+
+  if (pCurDqLayer->pTransformSize8x8Flag[iMbXy] && pCurDqLayer->pTransformSize8x8Flag[iNeighMb]) {
+    for (i = 0; i < 2; i++) {
+      uint8_t uiNzc = 0;
+      for (j = 0; uiNzc == 0 && j < 4; j++) {
+        uiNzc |= (pCurDqLayer->pNzc[iMbXy][* (pB8x8Idx + j)] | pCurDqLayer->pNzc[iNeighMb][* (pBn8x8Idx + j)]);
+      }
+      if (uiNzc) {
+        pBS[i << 1] = pBS[1 + (i << 1)] = 2;
+      } else {
+        pBS[i << 1] = pBS[1 + (i << 1)] = MB_BS_MV (pCurDqLayer->pRefIndex[LIST_0], pCurDqLayer->pMv[LIST_0], iMbXy, iNeighMb,
+                                          *pB8x8Idx, *pBn8x8Idx);
+      }
+      pB8x8Idx += 4;
+      pBn8x8Idx += 4;
+    }
+  } else if (pCurDqLayer->pTransformSize8x8Flag[iMbXy]) {
+    for (i = 0; i < 2; i++) {
+      uint8_t uiNzc = 0;
+      for (j = 0; uiNzc == 0 && j < 4; j++) {
+        uiNzc |= pCurDqLayer->pNzc[iMbXy][* (pB8x8Idx + j)];
+      }
+      for (j = 0; j < 2; j++) {
+        if (uiNzc | pCurDqLayer->pNzc[iNeighMb][*pBnIdx]) {
+          pBS[j + (i << 1)] = 2;
+        } else {
+          pBS[j + (i << 1)] = MB_BS_MV (pCurDqLayer->pRefIndex[LIST_0], pCurDqLayer->pMv[LIST_0], iMbXy, iNeighMb, *pB8x8Idx,
+                                        *pBnIdx);
+        }
+        pBnIdx++;
+      }
+      pB8x8Idx += 4;
+    }
+  } else if (pCurDqLayer->pTransformSize8x8Flag[iNeighMb]) {
+    for (i = 0; i < 2; i++) {
+      uint8_t uiNzc = 0;
+      for (j = 0; uiNzc == 0 && j < 4; j++) {
+        uiNzc |= pCurDqLayer->pNzc[iNeighMb][* (pBn8x8Idx + j)];
+      }
+      for (j = 0; j < 2; j++) {
+        if (uiNzc | pCurDqLayer->pNzc[iMbXy][*pBIdx]) {
+          pBS[j + (i << 1)] = 2;
+        } else {
+          pBS[j + (i << 1)] = MB_BS_MV (pCurDqLayer->pRefIndex[LIST_0], pCurDqLayer->pMv[LIST_0], iMbXy, iNeighMb, *pBIdx,
+                                        *pBn8x8Idx);
+        }
+        pBIdx++;
+      }
+      pBn8x8Idx += 4;
+    }
+  } else {
+    // only 4x4 transform
+    for (i = 0; i < 4; i++) {
+      if (pCurDqLayer->pNzc[iMbXy][*pBIdx] | pCurDqLayer->pNzc[iNeighMb][*pBnIdx]) {
+        pBS[i] = 2;
+      } else {
+        pBS[i] = MB_BS_MV (pCurDqLayer->pRefIndex[LIST_0], pCurDqLayer->pMv[LIST_0], iMbXy, iNeighMb, *pBIdx,
+                           *pBnIdx);
+      }
+      pBIdx++;
+      pBnIdx++;
+    }
+  }
+
+  return uiBSx4;
+}
+uint32_t DeblockingBSliceBsMarginalMBAvcbase (PDqLayer pCurDqLayer, int32_t iEdge, int32_t iNeighMb, int32_t iMbXy) {
+  int32_t i, j;
+  uint32_t uiBSx4;
+  uint8_t* pBS = (uint8_t*) (&uiBSx4);
+  const uint8_t* pBIdx = &g_kuiTableBIdx[iEdge][0];
+  const uint8_t* pBnIdx = &g_kuiTableBIdx[iEdge][4];
+  const uint8_t* pB8x8Idx = &g_kuiTableB8x8Idx[iEdge][0];
+  const uint8_t* pBn8x8Idx = &g_kuiTableB8x8Idx[iEdge][8];
+
+  if (pCurDqLayer->pTransformSize8x8Flag[iMbXy] && pCurDqLayer->pTransformSize8x8Flag[iNeighMb]) {
+    for (i = 0; i < 2; i++) {
+      uint8_t uiNzc = 0;
+      for (j = 0; uiNzc == 0 && j < 4; j++) {
+        uiNzc |= (pCurDqLayer->pNzc[iMbXy][* (pB8x8Idx + j)] | pCurDqLayer->pNzc[iNeighMb][* (pBn8x8Idx + j)]);
+      }
+      if (uiNzc) {
+        pBS[i << 1] = pBS[1 + (i << 1)] = 2;
+      } else {
+        pBS[i << 1] = pBS[1 + (i << 1)] = 1;
+        for (int32_t listIdx = LIST_0; listIdx < LIST_A; ++listIdx) {
+          if (pCurDqLayer->pRefIndex[listIdx][iMbXy][*pB8x8Idx] > REF_NOT_IN_LIST
+              && pCurDqLayer->pRefIndex[listIdx][iMbXy][*pBn8x8Idx] > REF_NOT_IN_LIST) {
+            pBS[i << 1] = pBS[1 + (i << 1)] = MB_BS_MV (pCurDqLayer->pRefIndex[listIdx], pCurDqLayer->pMv[listIdx], iMbXy, iNeighMb,
+                                              *pB8x8Idx, *pBn8x8Idx);
+            break;
+          }
+        }
+      }
+      pB8x8Idx += 4;
+      pBn8x8Idx += 4;
+    }
+  } else if (pCurDqLayer->pTransformSize8x8Flag[iMbXy]) {
+    for (i = 0; i < 2; i++) {
+      uint8_t uiNzc = 0;
+      for (j = 0; uiNzc == 0 && j < 4; j++) {
+        uiNzc |= pCurDqLayer->pNzc[iMbXy][* (pB8x8Idx + j)];
+      }
+      for (j = 0; j < 2; j++) {
+        if (uiNzc | pCurDqLayer->pNzc[iNeighMb][*pBnIdx]) {
+          pBS[j + (i << 1)] = 2;
+        } else {
+          pBS[j + (i << 1)] = 1;
+          for (int32_t listIdx = LIST_0; listIdx < LIST_A; ++listIdx) {
+            if (pCurDqLayer->pRefIndex[listIdx][iMbXy][*pB8x8Idx] > REF_NOT_IN_LIST
+                && pCurDqLayer->pRefIndex[listIdx][iMbXy][*pBnIdx] > REF_NOT_IN_LIST) {
+              pBS[j + (i << 1)] = MB_BS_MV (pCurDqLayer->pRefIndex[listIdx], pCurDqLayer->pMv[listIdx], iMbXy, iNeighMb, *pB8x8Idx,
+                                            *pBnIdx);
+              break;
+            }
+          }
+        }
+        pBnIdx++;
+      }
+      pB8x8Idx += 4;
+    }
+  } else if (pCurDqLayer->pTransformSize8x8Flag[iNeighMb]) {
+    for (i = 0; i < 2; i++) {
+      uint8_t uiNzc = 0;
+      for (j = 0; uiNzc == 0 && j < 4; j++) {
+        uiNzc |= pCurDqLayer->pNzc[iNeighMb][* (pBn8x8Idx + j)];
+      }
+      for (j = 0; j < 2; j++) {
+        if (uiNzc | pCurDqLayer->pNzc[iMbXy][*pBIdx]) {
+          pBS[j + (i << 1)] = 2;
+        } else {
+          pBS[j + (i << 1)] = 1;
+          for (int32_t listIdx = LIST_0; listIdx < LIST_A; ++listIdx) {
+            if (pCurDqLayer->pRefIndex[listIdx][iMbXy][*pBIdx] > REF_NOT_IN_LIST
+                && pCurDqLayer->pRefIndex[listIdx][iMbXy][*pBn8x8Idx] > REF_NOT_IN_LIST) {
+              pBS[j + (i << 1)] = MB_BS_MV (pCurDqLayer->pRefIndex[listIdx], pCurDqLayer->pMv[listIdx], iMbXy, iNeighMb, *pBIdx,
+                                            *pBn8x8Idx);
+              break;
+            }
+          }
+        }
+        pBIdx++;
+      }
+      pBn8x8Idx += 4;
+    }
+  } else {
+    // only 4x4 transform
+    for (i = 0; i < 4; i++) {
+      if (pCurDqLayer->pNzc[iMbXy][*pBIdx] | pCurDqLayer->pNzc[iNeighMb][*pBnIdx]) {
+        pBS[i] = 2;
+      } else {
+        pBS[i] = 1;
+        for (int32_t listIdx = LIST_0; listIdx < LIST_A; ++listIdx) {
+          if (pCurDqLayer->pRefIndex[listIdx][iMbXy][*pBIdx] > REF_NOT_IN_LIST
+              && pCurDqLayer->pRefIndex[listIdx][iMbXy][*pBnIdx] > REF_NOT_IN_LIST) {
+            pBS[i] = MB_BS_MV (pCurDqLayer->pRefIndex[listIdx], pCurDqLayer->pMv[listIdx], iMbXy, iNeighMb, *pBIdx, *pBnIdx);
+            break;
+          }
+        }
+      }
+      pBIdx++;
+      pBnIdx++;
+    }
+  }
+
+  return uiBSx4;
+}
+int32_t DeblockingAvailableNoInterlayer (PDqLayer pCurDqLayer, int32_t iFilterIdc) {
+  int32_t iMbY = pCurDqLayer->iMbY;
+  int32_t iMbX = pCurDqLayer->iMbX;
+  int32_t iMbXy = pCurDqLayer->iMbXyIndex;
+  bool bLeftFlag = false;
+  bool bTopFlag  = false;
+
+  if (2 == iFilterIdc) {
+    bLeftFlag = (iMbX > 0) && (pCurDqLayer->pSliceIdc[iMbXy] == pCurDqLayer->pSliceIdc[iMbXy - 1]);
+    bTopFlag  = (iMbY > 0) && (pCurDqLayer->pSliceIdc[iMbXy] == pCurDqLayer->pSliceIdc[iMbXy - pCurDqLayer->iMbWidth]);
+  } else { //if ( 0 == iFilterIdc )
+    bLeftFlag = (iMbX > 0);
+    bTopFlag  = (iMbY > 0);
+  }
+  return (bLeftFlag << LEFT_FLAG_BIT) | (bTopFlag << TOP_FLAG_BIT);
+}
+
+void FilteringEdgeLumaH (SDeblockingFilter* pFilter, uint8_t* pPix, int32_t iStride, uint8_t* pBS) {
+  int32_t iIndexA;
+  int32_t iAlpha;
+  int32_t iBeta;
+  ENFORCE_STACK_ALIGN_1D (int8_t, tc, 4, 16);
+
+  GET_ALPHA_BETA_FROM_QP (pFilter->iLumaQP, pFilter->iSliceAlphaC0Offset, pFilter->iSliceBetaOffset, iIndexA, iAlpha,
+                          iBeta);
+
+  if (iAlpha | iBeta) {
+    TC0_TBL_LOOKUP (tc, iIndexA, pBS, 0);
+    pFilter->pLoopf->pfLumaDeblockingLT4Ver (pPix, iStride, iAlpha, iBeta, tc);
+  }
+  return;
+}
+
+
+void FilteringEdgeLumaV (SDeblockingFilter* pFilter, uint8_t* pPix, int32_t iStride, uint8_t* pBS) {
+  int32_t  iIndexA;
+  int32_t  iAlpha;
+  int32_t  iBeta;
+  ENFORCE_STACK_ALIGN_1D (int8_t, tc, 4, 16);
+
+  GET_ALPHA_BETA_FROM_QP (pFilter->iLumaQP, pFilter->iSliceAlphaC0Offset, pFilter->iSliceBetaOffset, iIndexA, iAlpha,
+                          iBeta);
+
+  if (iAlpha | iBeta) {
+    TC0_TBL_LOOKUP (tc, iIndexA, pBS, 0);
+    pFilter->pLoopf->pfLumaDeblockingLT4Hor (pPix, iStride, iAlpha, iBeta, tc);
+  }
+  return;
+}
+
+
+void FilteringEdgeLumaIntraH (SDeblockingFilter* pFilter, uint8_t* pPix, int32_t iStride, uint8_t* pBS) {
+  int32_t iIndexA;
+  int32_t iAlpha;
+  int32_t iBeta;
+
+  GET_ALPHA_BETA_FROM_QP (pFilter->iLumaQP, pFilter->iSliceAlphaC0Offset, pFilter->iSliceBetaOffset, iIndexA, iAlpha,
+                          iBeta);
+
+  if (iAlpha | iBeta) {
+    pFilter->pLoopf->pfLumaDeblockingEQ4Ver (pPix, iStride, iAlpha, iBeta);
+  }
+  return;
+}
+
+void FilteringEdgeLumaIntraV (SDeblockingFilter* pFilter, uint8_t* pPix, int32_t iStride, uint8_t* pBS) {
+  int32_t iIndexA;
+  int32_t iAlpha;
+  int32_t iBeta;
+
+  GET_ALPHA_BETA_FROM_QP (pFilter->iLumaQP, pFilter->iSliceAlphaC0Offset, pFilter->iSliceBetaOffset, iIndexA, iAlpha,
+                          iBeta);
+
+  if (iAlpha | iBeta) {
+    pFilter->pLoopf->pfLumaDeblockingEQ4Hor (pPix, iStride, iAlpha, iBeta);
+  }
+  return;
+}
+void FilteringEdgeChromaH (SDeblockingFilter* pFilter, uint8_t* pPixCb, uint8_t* pPixCr, int32_t iStride,
+                           uint8_t* pBS) {
+  int32_t iIndexA;
+  int32_t iAlpha;
+  int32_t iBeta;
+  ENFORCE_STACK_ALIGN_1D (int8_t, tc, 4, 16);
+  if (pFilter->iChromaQP[0] == pFilter->iChromaQP[1]) {
+
+    GET_ALPHA_BETA_FROM_QP (pFilter->iChromaQP[0], pFilter->iSliceAlphaC0Offset, pFilter->iSliceBetaOffset, iIndexA, iAlpha,
+                            iBeta);
+
+    if (iAlpha | iBeta) {
+      TC0_TBL_LOOKUP (tc, iIndexA, pBS, 1);
+      pFilter->pLoopf->pfChromaDeblockingLT4Ver (pPixCb, pPixCr, iStride, iAlpha, iBeta, tc);
+    }
+  } else {
+
+    for (int i = 0; i < 2; i++) {
+
+
+      GET_ALPHA_BETA_FROM_QP (pFilter->iChromaQP[i], pFilter->iSliceAlphaC0Offset, pFilter->iSliceBetaOffset, iIndexA, iAlpha,
+                              iBeta);
+
+      if (iAlpha | iBeta) {
+        uint8_t* pPixCbCr = (i == 0) ? pPixCb : pPixCr;
+        TC0_TBL_LOOKUP (tc, iIndexA, pBS, 1);
+        pFilter->pLoopf->pfChromaDeblockingLT4Ver2 (pPixCbCr, iStride, iAlpha, iBeta, tc);
+      }
+
+
+
+    }
+
+  }
+  return;
+}
+void FilteringEdgeChromaV (SDeblockingFilter* pFilter, uint8_t* pPixCb, uint8_t* pPixCr, int32_t iStride,
+                           uint8_t* pBS) {
+  int32_t iIndexA;
+  int32_t iAlpha;
+  int32_t iBeta;
+  ENFORCE_STACK_ALIGN_1D (int8_t, tc, 4, 16);
+  if (pFilter->iChromaQP[0] == pFilter->iChromaQP[1]) {
+
+
+    GET_ALPHA_BETA_FROM_QP (pFilter->iChromaQP[0], pFilter->iSliceAlphaC0Offset, pFilter->iSliceBetaOffset, iIndexA, iAlpha,
+                            iBeta);
+
+    if (iAlpha | iBeta) {
+      TC0_TBL_LOOKUP (tc, iIndexA, pBS, 1);
+      pFilter->pLoopf->pfChromaDeblockingLT4Hor (pPixCb, pPixCr, iStride, iAlpha, iBeta, tc);
+    }
+
+
+  } else {
+
+    for (int i = 0; i < 2; i++) {
+
+      GET_ALPHA_BETA_FROM_QP (pFilter->iChromaQP[i], pFilter->iSliceAlphaC0Offset, pFilter->iSliceBetaOffset, iIndexA, iAlpha,
+                              iBeta);
+
+      if (iAlpha | iBeta) {
+        uint8_t* pPixCbCr = (i == 0) ? pPixCb : pPixCr;
+        TC0_TBL_LOOKUP (tc, iIndexA, pBS, 1);
+        pFilter->pLoopf->pfChromaDeblockingLT4Hor2 (pPixCbCr, iStride, iAlpha, iBeta, tc);
+      }
+
+
+    }
+  }
+  return;
+}
+
+void FilteringEdgeChromaIntraH (SDeblockingFilter* pFilter, uint8_t* pPixCb, uint8_t* pPixCr, int32_t iStride,
+                                uint8_t* pBS) {
+  int32_t iIndexA;
+  int32_t iAlpha;
+  int32_t iBeta;
+  if (pFilter->iChromaQP[0] == pFilter->iChromaQP[1]) {
+
+    GET_ALPHA_BETA_FROM_QP (pFilter->iChromaQP[0], pFilter->iSliceAlphaC0Offset, pFilter->iSliceBetaOffset, iIndexA, iAlpha,
+                            iBeta);
+
+    if (iAlpha | iBeta) {
+      pFilter->pLoopf->pfChromaDeblockingEQ4Ver (pPixCb, pPixCr, iStride, iAlpha, iBeta);
+    }
+  } else {
+
+    for (int i = 0; i < 2; i++) {
+
+      GET_ALPHA_BETA_FROM_QP (pFilter->iChromaQP[i], pFilter->iSliceAlphaC0Offset, pFilter->iSliceBetaOffset, iIndexA, iAlpha,
+                              iBeta);
+
+      if (iAlpha | iBeta) {
+        uint8_t* pPixCbCr = (i == 0) ? pPixCb : pPixCr;
+        pFilter->pLoopf->pfChromaDeblockingEQ4Ver2 (pPixCbCr, iStride, iAlpha, iBeta);
+      }
+
+    }
+  }
+  return;
+}
+
+void FilteringEdgeChromaIntraV (SDeblockingFilter* pFilter, uint8_t* pPixCb, uint8_t* pPixCr, int32_t iStride,
+                                uint8_t* pBS) {
+  int32_t iIndexA;
+  int32_t iAlpha;
+  int32_t iBeta;
+  if (pFilter->iChromaQP[0] == pFilter->iChromaQP[1]) { // QP of cb and cr are the same
+
+
+
+
+    GET_ALPHA_BETA_FROM_QP (pFilter->iChromaQP[0], pFilter->iSliceAlphaC0Offset, pFilter->iSliceBetaOffset, iIndexA, iAlpha,
+                            iBeta);
+    if (iAlpha | iBeta) {
+      pFilter->pLoopf->pfChromaDeblockingEQ4Hor (pPixCb, pPixCr, iStride, iAlpha, iBeta);
+    }
+  } else {
+
+    for (int i = 0; i < 2; i++) {
+
+
+      GET_ALPHA_BETA_FROM_QP (pFilter->iChromaQP[i], pFilter->iSliceAlphaC0Offset, pFilter->iSliceBetaOffset, iIndexA, iAlpha,
+                              iBeta);
+      if (iAlpha | iBeta) {
+        uint8_t* pPixCbCr = (i == 0) ? pPixCb : pPixCr;
+        pFilter->pLoopf->pfChromaDeblockingEQ4Hor2 (pPixCbCr, iStride, iAlpha, iBeta);
+      }
+    }
+
+  }
+  return;
+}
+
+
+void DeblockingInterMb (PDqLayer pCurDqLayer, PDeblockingFilter  pFilter, uint8_t nBS[2][4][4],
+                        int32_t iBoundryFlag) {
+  int32_t iMbXyIndex = pCurDqLayer->iMbXyIndex;
+  int32_t iMbX = pCurDqLayer->iMbX;
+  int32_t iMbY = pCurDqLayer->iMbY;
+
+  int32_t iCurLumaQp = pCurDqLayer->pLumaQp[iMbXyIndex];
+  //int32_t* iCurChromaQp = pCurDqLayer->pChromaQp[iMbXyIndex];
+  int8_t* pCurChromaQp = pCurDqLayer->pChromaQp[iMbXyIndex];
+  int32_t iLineSize   = pFilter->iCsStride[0];
+  int32_t iLineSizeUV = pFilter->iCsStride[1];
+
+  uint8_t* pDestY, * pDestCb, * pDestCr;
+  pDestY  = pFilter->pCsData[0] + ((iMbY * iLineSize + iMbX) << 4);
+  pDestCb = pFilter->pCsData[1] + ((iMbY * iLineSizeUV + iMbX) << 3);
+  pDestCr = pFilter->pCsData[2] + ((iMbY * iLineSizeUV + iMbX) << 3);
+
+//Vertical margrin
+  if (iBoundryFlag & LEFT_FLAG_MASK) {
+    int32_t iLeftXyIndex = iMbXyIndex - 1;
+    pFilter->iLumaQP   = (iCurLumaQp + pCurDqLayer->pLumaQp[iLeftXyIndex] + 1) >> 1;
+    for (int i = 0; i < 2; i++) {
+      pFilter->iChromaQP[i] = (pCurChromaQp[i] + pCurDqLayer->pChromaQp[iLeftXyIndex][i] + 1) >> 1;
+    }
+    if (nBS[0][0][0] == 0x04) {
+      FilteringEdgeLumaIntraV (pFilter, pDestY, iLineSize, NULL);
+      FilteringEdgeChromaIntraV (pFilter, pDestCb, pDestCr, iLineSizeUV, NULL);
+    } else {
+      if (* (uint32_t*)nBS[0][0] != 0) {
+        FilteringEdgeLumaV (pFilter, pDestY, iLineSize, nBS[0][0]);
+        FilteringEdgeChromaV (pFilter, pDestCb, pDestCr, iLineSizeUV, nBS[0][0]);
+      }
+    }
+  }
+
+  pFilter->iLumaQP = iCurLumaQp;
+  pFilter->iChromaQP[0] = pCurChromaQp[0];
+  pFilter->iChromaQP[1] = pCurChromaQp[1];
+
+  if (* (uint32_t*)nBS[0][1] != 0 && !pCurDqLayer->pTransformSize8x8Flag[iMbXyIndex]) {
+    FilteringEdgeLumaV (pFilter, &pDestY[1 << 2], iLineSize, nBS[0][1]);
+  }
+
+  if (* (uint32_t*)nBS[0][2] != 0) {
+    FilteringEdgeLumaV (pFilter, &pDestY[2 << 2], iLineSize, nBS[0][2]);
+    FilteringEdgeChromaV (pFilter, &pDestCb[2 << 1], &pDestCr[2 << 1], iLineSizeUV, nBS[0][2]);
+  }
+
+  if (* (uint32_t*)nBS[0][3] != 0  && !pCurDqLayer->pTransformSize8x8Flag[iMbXyIndex]) {
+    FilteringEdgeLumaV (pFilter, &pDestY[3 << 2], iLineSize, nBS[0][3]);
+  }
+
+  if (iBoundryFlag & TOP_FLAG_MASK) {
+    int32_t iTopXyIndex = iMbXyIndex - pCurDqLayer->iMbWidth;
+    pFilter->iLumaQP = (iCurLumaQp + pCurDqLayer->pLumaQp[iTopXyIndex] + 1) >> 1;
+    for (int i = 0; i < 2; i++) {
+      pFilter->iChromaQP[i] = (pCurChromaQp[i] + pCurDqLayer->pChromaQp[iTopXyIndex][i] + 1) >> 1;
+    }
+
+    if (nBS[1][0][0] == 0x04) {
+      FilteringEdgeLumaIntraH (pFilter, pDestY, iLineSize, NULL);
+      FilteringEdgeChromaIntraH (pFilter, pDestCb, pDestCr, iLineSizeUV, NULL);
+    } else {
+      if (* (uint32_t*)nBS[1][0] != 0) {
+        FilteringEdgeLumaH (pFilter, pDestY, iLineSize, nBS[1][0]);
+        FilteringEdgeChromaH (pFilter, pDestCb, pDestCr, iLineSizeUV, nBS[1][0]);
+      }
+    }
+  }
+
+  pFilter->iLumaQP = iCurLumaQp;
+  pFilter->iChromaQP[0] = pCurChromaQp[0];
+  pFilter->iChromaQP[1] = pCurChromaQp[1];
+
+  if (* (uint32_t*)nBS[1][1] != 0  && !pCurDqLayer->pTransformSize8x8Flag[iMbXyIndex]) {
+    FilteringEdgeLumaH (pFilter, &pDestY[ (1 << 2)*iLineSize], iLineSize, nBS[1][1]);
+  }
+
+  if (* (uint32_t*)nBS[1][2] != 0) {
+    FilteringEdgeLumaH (pFilter, &pDestY[ (2 << 2)*iLineSize], iLineSize, nBS[1][2]);
+    FilteringEdgeChromaH (pFilter, &pDestCb[ (2 << 1)*iLineSizeUV], &pDestCr[ (2 << 1)*iLineSizeUV], iLineSizeUV,
+                          nBS[1][2]);
+  }
+
+  if (* (uint32_t*)nBS[1][3] != 0  && !pCurDqLayer->pTransformSize8x8Flag[iMbXyIndex]) {
+    FilteringEdgeLumaH (pFilter, &pDestY[ (3 << 2)*iLineSize], iLineSize, nBS[1][3]);
+  }
+}
+
+void FilteringEdgeLumaHV (PDqLayer pCurDqLayer, PDeblockingFilter  pFilter, int32_t iBoundryFlag) {
+  int32_t iMbXyIndex = pCurDqLayer->iMbXyIndex;
+  int32_t iMbX      = pCurDqLayer->iMbX;
+  int32_t iMbY      = pCurDqLayer->iMbY;
+  int32_t iMbWidth  = pCurDqLayer->iMbWidth;
+  int32_t iLineSize  = pFilter->iCsStride[0];
+
+  uint8_t*  pDestY;
+  int32_t  iCurQp;
+  int32_t  iIndexA, iAlpha, iBeta;
+
+  ENFORCE_STACK_ALIGN_1D (int8_t,  iTc,   4, 16);
+  ENFORCE_STACK_ALIGN_1D (uint8_t, uiBSx4, 4, 4);
+
+  pDestY  = pFilter->pCsData[0] + ((iMbY * iLineSize + iMbX) << 4);
+  iCurQp  = pCurDqLayer->pLumaQp[iMbXyIndex];
+
+  * (uint32_t*)uiBSx4 = 0x03030303;
+
+  // luma v
+  if (iBoundryFlag & LEFT_FLAG_MASK) {
+    pFilter->iLumaQP   = (iCurQp   + pCurDqLayer->pLumaQp[iMbXyIndex - 1] + 1) >> 1;
+    FilteringEdgeLumaIntraV (pFilter, pDestY, iLineSize, NULL);
+  }
+
+  pFilter->iLumaQP   = iCurQp;
+  GET_ALPHA_BETA_FROM_QP (pFilter->iLumaQP, pFilter->iSliceAlphaC0Offset, pFilter->iSliceBetaOffset, iIndexA, iAlpha,
+                          iBeta);
+  if (iAlpha | iBeta) {
+    TC0_TBL_LOOKUP (iTc, iIndexA, uiBSx4, 0);
+
+    if (!pCurDqLayer->pTransformSize8x8Flag[iMbXyIndex]) {
+      pFilter->pLoopf->pfLumaDeblockingLT4Hor (&pDestY[1 << 2], iLineSize, iAlpha, iBeta, iTc);
+    }
+
+    pFilter->pLoopf->pfLumaDeblockingLT4Hor (&pDestY[2 << 2], iLineSize, iAlpha, iBeta, iTc);
+
+    if (!pCurDqLayer->pTransformSize8x8Flag[iMbXyIndex]) {
+      pFilter->pLoopf->pfLumaDeblockingLT4Hor (&pDestY[3 << 2], iLineSize, iAlpha, iBeta, iTc);
+    }
+  }
+
+  // luma h
+  if (iBoundryFlag & TOP_FLAG_MASK) {
+    pFilter->iLumaQP   = (iCurQp   + pCurDqLayer->pLumaQp[iMbXyIndex - iMbWidth] + 1) >> 1;
+    FilteringEdgeLumaIntraH (pFilter, pDestY, iLineSize, NULL);
+  }
+
+  pFilter->iLumaQP   = iCurQp;
+  if (iAlpha | iBeta) {
+    if (!pCurDqLayer->pTransformSize8x8Flag[iMbXyIndex]) {
+      pFilter->pLoopf->pfLumaDeblockingLT4Ver (&pDestY[ (1 << 2)*iLineSize], iLineSize, iAlpha, iBeta, iTc);
+    }
+
+    pFilter->pLoopf->pfLumaDeblockingLT4Ver (&pDestY[ (2 << 2)*iLineSize], iLineSize, iAlpha, iBeta, iTc);
+
+    if (!pCurDqLayer->pTransformSize8x8Flag[iMbXyIndex]) {
+      pFilter->pLoopf->pfLumaDeblockingLT4Ver (&pDestY[ (3 << 2)*iLineSize], iLineSize, iAlpha, iBeta, iTc);
+    }
+  }
+}
+void FilteringEdgeChromaHV (PDqLayer pCurDqLayer, PDeblockingFilter  pFilter, int32_t iBoundryFlag) {
+  int32_t iMbXyIndex     = pCurDqLayer->iMbXyIndex;
+  int32_t iMbX      = pCurDqLayer->iMbX;
+  int32_t iMbY      = pCurDqLayer->iMbY;
+  int32_t iMbWidth  = pCurDqLayer->iMbWidth;
+  int32_t iLineSize  = pFilter->iCsStride[1];
+
+  uint8_t* pDestCb;
+  uint8_t* pDestCr;
+  //int32_t  iCurQp;
+  int8_t* pCurQp;
+  int32_t  iIndexA, iAlpha, iBeta;
+
+  ENFORCE_STACK_ALIGN_1D (int8_t,  iTc,   4, 16);
+  ENFORCE_STACK_ALIGN_1D (uint8_t, uiBSx4, 4, 4);
+
+  pDestCb = pFilter->pCsData[1] + ((iMbY * iLineSize + iMbX) << 3);
+  pDestCr = pFilter->pCsData[2] + ((iMbY * iLineSize + iMbX) << 3);
+  pCurQp  = pCurDqLayer->pChromaQp[iMbXyIndex];
+
+  * (uint32_t*)uiBSx4 = 0x03030303;
+
+
+// chroma v
+  if (iBoundryFlag & LEFT_FLAG_MASK) {
+
+    for (int i = 0; i < 2; i++) {
+      pFilter->iChromaQP[i] = (pCurQp[i] + pCurDqLayer->pChromaQp[iMbXyIndex - 1][i] + 1) >> 1;
+
+    }
+    FilteringEdgeChromaIntraV (pFilter, pDestCb, pDestCr, iLineSize, NULL);
+  }
+
+  pFilter->iChromaQP[0]   = pCurQp[0];
+  pFilter->iChromaQP[1]   = pCurQp[1];
+  if (pFilter->iChromaQP[0] == pFilter->iChromaQP[1]) {
+    GET_ALPHA_BETA_FROM_QP (pFilter->iChromaQP[0], pFilter->iSliceAlphaC0Offset, pFilter->iSliceBetaOffset, iIndexA, iAlpha,
+                            iBeta);
+    if (iAlpha | iBeta) {
+      TC0_TBL_LOOKUP (iTc, iIndexA, uiBSx4, 1);
+      pFilter->pLoopf->pfChromaDeblockingLT4Hor (&pDestCb[2 << 1], &pDestCr[2 << 1], iLineSize, iAlpha, iBeta, iTc);
+    }
+  } else {
+
+    for (int i = 0; i < 2; i++) {
+      GET_ALPHA_BETA_FROM_QP (pFilter->iChromaQP[i], pFilter->iSliceAlphaC0Offset, pFilter->iSliceBetaOffset, iIndexA, iAlpha,
+                              iBeta);
+      if (iAlpha | iBeta) {
+        uint8_t* pDestCbCr = (i == 0) ? &pDestCb[2 << 1] : &pDestCr[2 << 1];
+        TC0_TBL_LOOKUP (iTc, iIndexA, uiBSx4, 1);
+        pFilter->pLoopf->pfChromaDeblockingLT4Hor2 (pDestCbCr, iLineSize, iAlpha, iBeta, iTc);
+      }
+
+    }
+  }
+
+  // chroma h
+
+  if (iBoundryFlag & TOP_FLAG_MASK) {
+    for (int i = 0; i < 2; i++) {
+      pFilter->iChromaQP[i] = (pCurQp[i] + pCurDqLayer->pChromaQp[iMbXyIndex - iMbWidth][i] + 1) >> 1;
+    }
+    FilteringEdgeChromaIntraH (pFilter, pDestCb, pDestCr, iLineSize, NULL);
+  }
+
+  pFilter->iChromaQP[0]   = pCurQp[0];
+  pFilter->iChromaQP[1]   = pCurQp[1];
+
+  if (pFilter->iChromaQP[0] == pFilter->iChromaQP[1]) {
+    GET_ALPHA_BETA_FROM_QP (pFilter->iChromaQP[0], pFilter->iSliceAlphaC0Offset, pFilter->iSliceBetaOffset, iIndexA, iAlpha,
+                            iBeta);
+    if (iAlpha | iBeta) {
+      TC0_TBL_LOOKUP (iTc, iIndexA, uiBSx4, 1);
+      pFilter->pLoopf->pfChromaDeblockingLT4Ver (&pDestCb[ (2 << 1)*iLineSize], &pDestCr[ (2 << 1)*iLineSize], iLineSize,
+          iAlpha, iBeta, iTc);
+    }
+  } else {
+    for (int i = 0; i < 2; i++) {
+
+      GET_ALPHA_BETA_FROM_QP (pFilter->iChromaQP[i], pFilter->iSliceAlphaC0Offset, pFilter->iSliceBetaOffset, iIndexA, iAlpha,
+                              iBeta);
+      if (iAlpha | iBeta) {
+        TC0_TBL_LOOKUP (iTc, iIndexA, uiBSx4, 1);
+        uint8_t* pDestCbCr = (i == 0) ? &pDestCb[ (2 << 1) * iLineSize] : &pDestCr[ (2 << 1) * iLineSize];
+        pFilter->pLoopf->pfChromaDeblockingLT4Ver2 (pDestCbCr, iLineSize,
+            iAlpha, iBeta, iTc);
+      }
+    }
+
+
+  }
+}
+
+// merge h&v lookup table operation to save performance
+void DeblockingIntraMb (PDqLayer pCurDqLayer, PDeblockingFilter  pFilter, int32_t iBoundryFlag) {
+  FilteringEdgeLumaHV (pCurDqLayer, pFilter, iBoundryFlag);
+  FilteringEdgeChromaHV (pCurDqLayer, pFilter, iBoundryFlag);
+}
+
+void WelsDeblockingMb (PDqLayer pCurDqLayer, PDeblockingFilter  pFilter, int32_t iBoundryFlag) {
+  uint8_t nBS[2][4][4] = {{{ 0 }}};
+
+  int32_t iMbXyIndex  = pCurDqLayer->iMbXyIndex;
+  uint32_t iCurMbType  = pCurDqLayer->pMbType[iMbXyIndex];
+  int32_t iMbNb;
+
+  PSlice pSlice = &pCurDqLayer->sLayerInfo.sSliceInLayer;
+  PSliceHeader pSliceHeader = &pSlice->sSliceHeaderExt.sSliceHeader;
+  bool bBSlice = pSliceHeader->eSliceType == B_SLICE;
+
+  switch (iCurMbType) {
+  case MB_TYPE_INTRA4x4:
+  case MB_TYPE_INTRA8x8:
+  case MB_TYPE_INTRA16x16:
+  case MB_TYPE_INTRA_PCM:
+    DeblockingIntraMb (pCurDqLayer, pFilter, iBoundryFlag);
+    break;
+  default:
+
+    if (iBoundryFlag & LEFT_FLAG_MASK) {
+      iMbNb = iMbXyIndex - 1;
+      if (bBSlice) {
+        * (uint32_t*)nBS[0][0] = IS_INTRA (pCurDqLayer->pMbType[iMbNb]) ? 0x04040404 : DeblockingBSliceBsMarginalMBAvcbase (
+                                   pCurDqLayer, 0, iMbNb, iMbXyIndex);
+      } else {
+        * (uint32_t*)nBS[0][0] = IS_INTRA (pCurDqLayer->pMbType[iMbNb]) ? 0x04040404 : DeblockingBsMarginalMBAvcbase (
+                                   pCurDqLayer, 0, iMbNb, iMbXyIndex);
+      }
+    } else {
+      * (uint32_t*)nBS[0][0] = 0;
+    }
+    if (iBoundryFlag & TOP_FLAG_MASK) {
+      iMbNb = iMbXyIndex - pCurDqLayer->iMbWidth;
+      if (bBSlice) {
+        * (uint32_t*)nBS[1][0] = IS_INTRA (pCurDqLayer->pMbType[iMbNb]) ? 0x04040404 : DeblockingBSliceBsMarginalMBAvcbase (
+                                   pCurDqLayer, 1, iMbNb, iMbXyIndex);
+      } else {
+        * (uint32_t*)nBS[1][0] = IS_INTRA (pCurDqLayer->pMbType[iMbNb]) ? 0x04040404 : DeblockingBsMarginalMBAvcbase (
+                                   pCurDqLayer, 1, iMbNb, iMbXyIndex);
+      }
+    } else {
+      * (uint32_t*)nBS[1][0] = 0;
+    }
+    //SKIP MB_16x16 or others
+    if (IS_SKIP (iCurMbType)) {
+      * (uint32_t*)nBS[0][1] = * (uint32_t*)nBS[0][2] = * (uint32_t*)nBS[0][3] =
+                                 * (uint32_t*)nBS[1][1] = * (uint32_t*)nBS[1][2] = * (uint32_t*)nBS[1][3] = 0;
+    } else {
+      if (IS_INTER_16x16 (iCurMbType)) {
+        if (!pCurDqLayer->pTransformSize8x8Flag[pCurDqLayer->iMbXyIndex]) {
+          DeblockingBSInsideMBAvsbase (pCurDqLayer->pNzc[iMbXyIndex], nBS, 1);
+        } else {
+          DeblockingBSInsideMBAvsbase8x8 (pCurDqLayer->pNzc[iMbXyIndex], nBS, 1);
+        }
+      } else {
+
+        if (bBSlice) {
+          DeblockingBSliceBSInsideMBNormal (pCurDqLayer, nBS, pCurDqLayer->pNzc[iMbXyIndex], iMbXyIndex);
+        } else {
+          DeblockingBSInsideMBNormal (pCurDqLayer, nBS, pCurDqLayer->pNzc[iMbXyIndex], iMbXyIndex);
+        }
+      }
+    }
+    DeblockingInterMb (pCurDqLayer, pFilter, nBS, iBoundryFlag);
+    break;
+  }
+}
+
+/*!
+ * \brief   AVC slice deblocking filtering target layer
+ *
+ * \param   dec         Wels avc decoder context
+ *
+ * \return  NONE
+ */
+void WelsDeblockingFilterSlice (PWelsDecoderContext pCtx, PDeblockingFilterMbFunc pDeblockMb) {
+  PDqLayer pCurDqLayer = pCtx->pCurDqLayer;
+  PSliceHeaderExt pSliceHeaderExt = &pCurDqLayer->sLayerInfo.sSliceInLayer.sSliceHeaderExt;
+  int32_t iMbWidth  = pCurDqLayer->iMbWidth;
+  int32_t iTotalMbCount = pSliceHeaderExt->sSliceHeader.pSps->uiTotalMbCount;
+
+  SDeblockingFilter pFilter;
+  memset (&pFilter, 0, sizeof (pFilter));
+  PFmo pFmo = pCtx->pFmo;
+  int32_t iNextMbXyIndex = 0;
+  int32_t iTotalNumMb = pCurDqLayer->sLayerInfo.sSliceInLayer.iTotalMbInCurSlice;
+  int32_t iCountNumMb = 0;
+  int32_t iBoundryFlag;
+  int32_t iFilterIdc = pCurDqLayer->sLayerInfo.sSliceInLayer.sSliceHeaderExt.sSliceHeader.uiDisableDeblockingFilterIdc;
+
+  /* Step1: parameters set */
+  pFilter.pCsData[0] = pCtx->pDec->pData[0];
+  pFilter.pCsData[1] = pCtx->pDec->pData[1];
+  pFilter.pCsData[2] = pCtx->pDec->pData[2];
+
+  pFilter.iCsStride[0] = pCtx->pDec->iLinesize[0];
+  pFilter.iCsStride[1] = pCtx->pDec->iLinesize[1];
+
+  pFilter.eSliceType = (EWelsSliceType) pCurDqLayer->sLayerInfo.sSliceInLayer.eSliceType;
+
+  pFilter.iSliceAlphaC0Offset = pSliceHeaderExt->sSliceHeader.iSliceAlphaC0Offset;
+  pFilter.iSliceBetaOffset     = pSliceHeaderExt->sSliceHeader.iSliceBetaOffset;
+
+  pFilter.pLoopf = &pCtx->sDeblockingFunc;
+
+  /* Step2: macroblock deblocking */
+  if (0 == iFilterIdc || 2 == iFilterIdc) {
+    iNextMbXyIndex = pSliceHeaderExt->sSliceHeader.iFirstMbInSlice;
+    pCurDqLayer->iMbX  = iNextMbXyIndex % iMbWidth;
+    pCurDqLayer->iMbY  = iNextMbXyIndex / iMbWidth;
+    pCurDqLayer->iMbXyIndex = iNextMbXyIndex;
+
+    do {
+      iBoundryFlag = DeblockingAvailableNoInterlayer (pCurDqLayer, iFilterIdc);
+
+      pDeblockMb (pCurDqLayer, &pFilter, iBoundryFlag);
+
+      ++iCountNumMb;
+      if (iCountNumMb >= iTotalNumMb) {
+        break;
+      }
+
+      if (pSliceHeaderExt->sSliceHeader.pPps->uiNumSliceGroups > 1) {
+        iNextMbXyIndex = FmoNextMb (pFmo, iNextMbXyIndex);
+      } else {
+        ++iNextMbXyIndex;
+      }
+      if (-1 == iNextMbXyIndex || iNextMbXyIndex >= iTotalMbCount) { // slice group boundary or end of a frame
+        break;
+      }
+
+      pCurDqLayer->iMbX  = iNextMbXyIndex % iMbWidth;
+      pCurDqLayer->iMbY  = iNextMbXyIndex / iMbWidth;
+      pCurDqLayer->iMbXyIndex = iNextMbXyIndex;
+    } while (1);
+  }
+}
+/*!
+ * \brief   deblocking module initialize
+ *
+ * \param   pf
+ *          cpu
+ *
+ * \return  NONE
+ */
+
+void  DeblockingInit (SDeblockingFunc*  pFunc,  int32_t iCpu) {
+  pFunc->pfLumaDeblockingLT4Ver     = DeblockLumaLt4V_c;
+  pFunc->pfLumaDeblockingEQ4Ver     = DeblockLumaEq4V_c;
+  pFunc->pfLumaDeblockingLT4Hor     = DeblockLumaLt4H_c;
+  pFunc->pfLumaDeblockingEQ4Hor     = DeblockLumaEq4H_c;
+
+  pFunc->pfChromaDeblockingLT4Ver   = DeblockChromaLt4V_c;
+  pFunc->pfChromaDeblockingEQ4Ver   = DeblockChromaEq4V_c;
+  pFunc->pfChromaDeblockingLT4Hor   = DeblockChromaLt4H_c;
+  pFunc->pfChromaDeblockingEQ4Hor   = DeblockChromaEq4H_c;
+
+  pFunc->pfChromaDeblockingLT4Ver2  = DeblockChromaLt4V2_c;
+  pFunc->pfChromaDeblockingEQ4Ver2  = DeblockChromaEq4V2_c;
+  pFunc->pfChromaDeblockingLT4Hor2  = DeblockChromaLt4H2_c;
+  pFunc->pfChromaDeblockingEQ4Hor2  = DeblockChromaEq4H2_c;
+
+#ifdef X86_ASM
+  if (iCpu & WELS_CPU_SSSE3) {
+    pFunc->pfLumaDeblockingLT4Ver   = DeblockLumaLt4V_ssse3;
+    pFunc->pfLumaDeblockingEQ4Ver   = DeblockLumaEq4V_ssse3;
+    pFunc->pfLumaDeblockingLT4Hor   = DeblockLumaLt4H_ssse3;
+    pFunc->pfLumaDeblockingEQ4Hor   = DeblockLumaEq4H_ssse3;
+    pFunc->pfChromaDeblockingLT4Ver = DeblockChromaLt4V_ssse3;
+    pFunc->pfChromaDeblockingEQ4Ver = DeblockChromaEq4V_ssse3;
+    pFunc->pfChromaDeblockingLT4Hor = DeblockChromaLt4H_ssse3;
+    pFunc->pfChromaDeblockingEQ4Hor = DeblockChromaEq4H_ssse3;
+  }
+#endif
+
+#if defined(HAVE_NEON)
+  if (iCpu & WELS_CPU_NEON) {
+    pFunc->pfLumaDeblockingLT4Ver   = DeblockLumaLt4V_neon;
+    pFunc->pfLumaDeblockingEQ4Ver   = DeblockLumaEq4V_neon;
+    pFunc->pfLumaDeblockingLT4Hor   = DeblockLumaLt4H_neon;
+    pFunc->pfLumaDeblockingEQ4Hor   = DeblockLumaEq4H_neon;
+
+    pFunc->pfChromaDeblockingLT4Ver = DeblockChromaLt4V_neon;
+    pFunc->pfChromaDeblockingEQ4Ver = DeblockChromaEq4V_neon;
+    pFunc->pfChromaDeblockingLT4Hor = DeblockChromaLt4H_neon;
+    pFunc->pfChromaDeblockingEQ4Hor = DeblockChromaEq4H_neon;
+  }
+#endif
+
+#if defined(HAVE_NEON_AARCH64)
+  if (iCpu & WELS_CPU_NEON) {
+    pFunc->pfLumaDeblockingLT4Ver   = DeblockLumaLt4V_AArch64_neon;
+    pFunc->pfLumaDeblockingEQ4Ver   = DeblockLumaEq4V_AArch64_neon;
+    pFunc->pfLumaDeblockingLT4Hor   = DeblockLumaLt4H_AArch64_neon;
+    pFunc->pfLumaDeblockingEQ4Hor   = DeblockLumaEq4H_AArch64_neon;
+
+    pFunc->pfChromaDeblockingLT4Ver = DeblockChromaLt4V_AArch64_neon;
+    pFunc->pfChromaDeblockingEQ4Ver = DeblockChromaEq4V_AArch64_neon;
+    pFunc->pfChromaDeblockingLT4Hor = DeblockChromaLt4H_AArch64_neon;
+    pFunc->pfChromaDeblockingEQ4Hor = DeblockChromaEq4H_AArch64_neon;
+  }
+#endif
+}
+
+} // namespace WelsDec
--- a/codec/decoder/core/src/decode_slice.cpp
+++ b/codec/decoder/core/src/decode_slice.cpp
@@ -1,2027 +1,2402 @@
-/*!
- * \copy
- *     Copyright (c)  2008-2013, Cisco Systems
- *     All rights reserved.
- *
- *     Redistribution and use in source and binary forms, with or without
- *     modification, are permitted provided that the following conditions
- *     are met:
- *
- *        * Redistributions of source code must retain the above copyright
- *          notice, this list of conditions and the following disclaimer.
- *
- *        * Redistributions in binary form must reproduce the above copyright
- *          notice, this list of conditions and the following disclaimer in
- *          the documentation and/or other materials provided with the
- *          distribution.
- *
- *     THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- *     "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- *     LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
- *     FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
- *     COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
- *     INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
- *     BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
- *     LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
- *     CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- *     LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
- *     ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- *     POSSIBILITY OF SUCH DAMAGE.
- *
- *
- *  Abstract
- *      current slice decoding
- *
- *  History
- *      07/10/2008 Created
- *      08/09/2013 Modified
- *
- *****************************************************************************/
-
-
-#include "deblocking.h"
-
-#include "decode_slice.h"
-
-#include "parse_mb_syn_cavlc.h"
-#include "parse_mb_syn_cabac.h"
-#include "rec_mb.h"
-#include "mv_pred.h"
-
-#include "cpu_core.h"
-
-namespace WelsDec {
-
-int32_t WelsTargetSliceConstruction (PWelsDecoderContext pCtx) {
-  PDqLayer pCurLayer = pCtx->pCurDqLayer;
-  PSlice pCurSlice = &pCurLayer->sLayerInfo.sSliceInLayer;
-  PSliceHeader pSliceHeader = &pCurSlice->sSliceHeaderExt.sSliceHeader;
-
-  int32_t iTotalMbTargetLayer = pSliceHeader->pSps->uiTotalMbCount;
-
-  int32_t iCurLayerWidth  = pCurLayer->iMbWidth << 4;
-  int32_t iCurLayerHeight = pCurLayer->iMbHeight << 4;
-
-  int32_t iNextMbXyIndex = 0;
-  PFmo pFmo = pCtx->pFmo;
-
-  int32_t iTotalNumMb = pCurSlice->iTotalMbInCurSlice;
-  int32_t iCountNumMb = 0;
-  PDeblockingFilterMbFunc pDeblockMb;
-
-  if (!pCtx->bAvcBasedFlag && iCurLayerWidth != pCtx->iCurSeqIntervalMaxPicWidth) {
-    return ERR_INFO_WIDTH_MISMATCH;
-  }
-
-  iNextMbXyIndex   = pSliceHeader->iFirstMbInSlice;
-  pCurLayer->iMbX  = iNextMbXyIndex % pCurLayer->iMbWidth;
-  pCurLayer->iMbY  = iNextMbXyIndex / pCurLayer->iMbWidth;
-  pCurLayer->iMbXyIndex = iNextMbXyIndex;
-
-  if (0 == iNextMbXyIndex) {
-    pCurLayer->pDec->iSpsId = pCtx->pSps->iSpsId;
-    pCurLayer->pDec->iPpsId = pCtx->pPps->iPpsId;
-
-    pCurLayer->pDec->uiQualityId = pCurLayer->sLayerInfo.sNalHeaderExt.uiQualityId;
-  }
-
-  do {
-    if (iCountNumMb >= iTotalNumMb) {
-      break;
-    }
-
-    if (!pCtx->pParam->bParseOnly) { //for parse only, actual recon MB unnecessary
-      if (WelsTargetMbConstruction (pCtx)) {
-        WelsLog (& (pCtx->sLogCtx), WELS_LOG_WARNING,
-                 "WelsTargetSliceConstruction():::MB(%d, %d) construction error. pCurSlice_type:%d",
-                 pCurLayer->iMbX, pCurLayer->iMbY, pCurSlice->eSliceType);
-
-        return ERR_INFO_MB_RECON_FAIL;
-      }
-    }
-
-    ++iCountNumMb;
-    if (!pCurLayer->pMbCorrectlyDecodedFlag[iNextMbXyIndex]) { //already con-ed, overwrite
-      pCurLayer->pMbCorrectlyDecodedFlag[iNextMbXyIndex] = true;
-      pCtx->pDec->iMbEcedPropNum += (pCurLayer->pMbRefConcealedFlag[iNextMbXyIndex] ? 1 : 0);
-      ++pCtx->iTotalNumMbRec;
-    }
-
-    if (pCtx->iTotalNumMbRec > iTotalMbTargetLayer) {
-      WelsLog (& (pCtx->sLogCtx), WELS_LOG_WARNING,
-               "WelsTargetSliceConstruction():::pCtx->iTotalNumMbRec:%d, iTotalMbTargetLayer:%d",
-               pCtx->iTotalNumMbRec, iTotalMbTargetLayer);
-
-      return ERR_INFO_MB_NUM_EXCEED_FAIL;
-    }
-
-    if (pSliceHeader->pPps->uiNumSliceGroups > 1) {
-      iNextMbXyIndex = FmoNextMb (pFmo, iNextMbXyIndex);
-    } else {
-      ++iNextMbXyIndex;
-    }
-    if (-1 == iNextMbXyIndex || iNextMbXyIndex >= iTotalMbTargetLayer) { // slice group boundary or end of a frame
-      break;
-    }
-    pCurLayer->iMbX  = iNextMbXyIndex % pCurLayer->iMbWidth;
-    pCurLayer->iMbY  = iNextMbXyIndex / pCurLayer->iMbWidth;
-    pCurLayer->iMbXyIndex = iNextMbXyIndex;
-  } while (1);
-
-  pCtx->pDec->iWidthInPixel  = iCurLayerWidth;
-  pCtx->pDec->iHeightInPixel = iCurLayerHeight;
-
-  if ((pCurSlice->eSliceType != I_SLICE) && (pCurSlice->eSliceType != P_SLICE))
-    return ERR_NONE; //no error but just ignore the type unsupported
-
-  if (pCtx->pParam->bParseOnly) //for parse only, deblocking should not go on
-    return ERR_NONE;
-
-  pDeblockMb = WelsDeblockingMb;
-
-  if (1 == pSliceHeader->uiDisableDeblockingFilterIdc
-      || pCtx->pCurDqLayer->sLayerInfo.sSliceInLayer.iTotalMbInCurSlice <= 0) {
-    return ERR_NONE;//NO_SUPPORTED_FILTER_IDX
-  } else {
-    WelsDeblockingFilterSlice (pCtx, pDeblockMb);
-  }
-  // any other filter_idc not supported here, 7/22/2010
-
-  return ERR_NONE;
-}
-
-int32_t WelsMbInterSampleConstruction (PWelsDecoderContext pCtx, PDqLayer pCurLayer,
-                                       uint8_t* pDstY, uint8_t* pDstU, uint8_t* pDstV, int32_t iStrideL, int32_t iStrideC) {
-  int32_t iMbXy = pCurLayer->iMbXyIndex;
-  int32_t i, iIndex, iOffset;
-
-  if (pCurLayer->pTransformSize8x8Flag[iMbXy]) {
-    for (i = 0; i < 4; i++) {
-      iIndex = g_kuiMbCountScan4Idx[i << 2];
-      if (pCurLayer->pNzc[iMbXy][iIndex] || pCurLayer->pNzc[iMbXy][iIndex + 1] || pCurLayer->pNzc[iMbXy][iIndex + 4]
-          || pCurLayer->pNzc[iMbXy][iIndex + 5]) {
-        iOffset = ((iIndex >> 2) << 2) * iStrideL + ((iIndex % 4) << 2);
-        pCtx->pIdctResAddPredFunc8x8 (pDstY + iOffset, iStrideL, pCurLayer->pScaledTCoeff[iMbXy] + (i << 6));
-      }
-    }
-  } else {
-    // luma.
-    const int8_t* pNzc = pCurLayer->pNzc[iMbXy];
-    int16_t* pScaledTCoeff = pCurLayer->pScaledTCoeff[iMbXy];
-    pCtx->pIdctFourResAddPredFunc (pDstY + 0 * iStrideL + 0, iStrideL, pScaledTCoeff + 0 * 64, pNzc +  0);
-    pCtx->pIdctFourResAddPredFunc (pDstY + 0 * iStrideL + 8, iStrideL, pScaledTCoeff + 1 * 64, pNzc +  2);
-    pCtx->pIdctFourResAddPredFunc (pDstY + 8 * iStrideL + 0, iStrideL, pScaledTCoeff + 2 * 64, pNzc +  8);
-    pCtx->pIdctFourResAddPredFunc (pDstY + 8 * iStrideL + 8, iStrideL, pScaledTCoeff + 3 * 64, pNzc + 10);
-  }
-
-  const int8_t* pNzc = pCurLayer->pNzc[iMbXy];
-  int16_t* pScaledTCoeff = pCurLayer->pScaledTCoeff[iMbXy];
-  // Cb.
-  pCtx->pIdctFourResAddPredFunc (pDstU, iStrideC, pScaledTCoeff + 4 * 64, pNzc + 16);
-  // Cr.
-  pCtx->pIdctFourResAddPredFunc (pDstV, iStrideC, pScaledTCoeff + 5 * 64, pNzc + 18);
-
-  return ERR_NONE;
-}
-int32_t WelsMbInterConstruction (PWelsDecoderContext pCtx, PDqLayer pCurLayer) {
-  int32_t iMbX = pCurLayer->iMbX;
-  int32_t iMbY = pCurLayer->iMbY;
-  uint8_t*  pDstY, *pDstCb, *pDstCr;
-
-  int32_t iLumaStride   = pCtx->pDec->iLinesize[0];
-  int32_t iChromaStride = pCtx->pDec->iLinesize[1];
-
-  pDstY  = pCurLayer->pDec->pData[0] + ((iMbY * iLumaStride + iMbX) << 4);
-  pDstCb = pCurLayer->pDec->pData[1] + ((iMbY * iChromaStride + iMbX) << 3);
-  pDstCr = pCurLayer->pDec->pData[2] + ((iMbY * iChromaStride + iMbX) << 3);
-
-  GetInterPred (pDstY, pDstCb, pDstCr, pCtx);
-  WelsMbInterSampleConstruction (pCtx, pCurLayer, pDstY, pDstCb, pDstCr, iLumaStride, iChromaStride);
-
-  pCtx->sBlockFunc.pWelsSetNonZeroCountFunc (
-    pCurLayer->pNzc[pCurLayer->iMbXyIndex]); // set all none-zero nzc to 1; dbk can be opti!
-  return ERR_NONE;
-}
-
-void WelsLumaDcDequantIdct (int16_t* pBlock, int32_t iQp, PWelsDecoderContext pCtx) {
-  const int32_t kiQMul = pCtx->bUseScalingList ? pCtx->pDequant_coeff4x4[0][iQp][0] : (g_kuiDequantCoeff[iQp][0] << 4);
-#define STRIDE 16
-  int32_t i;
-  int32_t iTemp[16]; //FIXME check if this is a good idea
-  int16_t* pBlk = pBlock;
-  static const int32_t kiXOffset[4] = {0, STRIDE, STRIDE << 2,  5 * STRIDE};
-  static const int32_t kiYOffset[4] = {0, STRIDE << 1, STRIDE << 3, 10 * STRIDE};
-
-  for (i = 0; i < 4; i++) {
-    const int32_t kiOffset = kiYOffset[i];
-    const int32_t kiX1 = kiOffset + kiXOffset[2];
-    const int32_t kiX2 = STRIDE + kiOffset;
-    const int32_t kiX3 = kiOffset + kiXOffset[3];
-    const int32_t kiI4 = i << 2; // 4*i
-    const int32_t kiZ0 = pBlk[kiOffset] + pBlk[kiX1];
-    const int32_t kiZ1 = pBlk[kiOffset] - pBlk[kiX1];
-    const int32_t kiZ2 = pBlk[kiX2] - pBlk[kiX3];
-    const int32_t kiZ3 = pBlk[kiX2] + pBlk[kiX3];
-
-    iTemp[kiI4]  = kiZ0 + kiZ3;
-    iTemp[1 + kiI4] = kiZ1 + kiZ2;
-    iTemp[2 + kiI4] = kiZ1 - kiZ2;
-    iTemp[3 + kiI4] = kiZ0 - kiZ3;
-  }
-
-  for (i = 0; i < 4; i++) {
-    const int32_t kiOffset = kiXOffset[i];
-    const int32_t kiI4 = 4 + i;
-    const int32_t kiZ0 = iTemp[i] + iTemp[4 + kiI4];
-    const int32_t kiZ1 = iTemp[i] - iTemp[4 + kiI4];
-    const int32_t kiZ2 = iTemp[kiI4] - iTemp[8 + kiI4];
-    const int32_t kiZ3 = iTemp[kiI4] + iTemp[8 + kiI4];
-
-    pBlk[kiOffset] = ((kiZ0 + kiZ3) * kiQMul + (1 << 5)) >> 6; //FIXME think about merging this into decode_resdual
-    pBlk[kiYOffset[1] + kiOffset] = ((kiZ1 + kiZ2) * kiQMul + (1 << 5)) >> 6;
-    pBlk[kiYOffset[2] + kiOffset] = ((kiZ1 - kiZ2) * kiQMul + (1 << 5)) >> 6;
-    pBlk[kiYOffset[3] + kiOffset] = ((kiZ0 - kiZ3) * kiQMul + (1 << 5)) >> 6;
-  }
-#undef STRIDE
-}
-
-int32_t WelsMbIntraPredictionConstruction (PWelsDecoderContext pCtx, PDqLayer pCurLayer, bool bOutput) {
-//seems IPCM should not enter this path
-  int32_t iMbXy = pCurLayer->iMbXyIndex;
-
-  WelsFillRecNeededMbInfo (pCtx, bOutput, pCurLayer);
-
-  if (IS_INTRA16x16 (pCurLayer->pMbType[iMbXy])) {
-    RecI16x16Mb (iMbXy, pCtx, pCurLayer->pScaledTCoeff[iMbXy], pCurLayer);
-  } else if (IS_INTRA8x8 (pCurLayer->pMbType[iMbXy])) {
-    RecI8x8Mb (iMbXy, pCtx, pCurLayer->pScaledTCoeff[iMbXy], pCurLayer);
-  } else if (IS_INTRA4x4 (pCurLayer->pMbType[iMbXy])) {
-    RecI4x4Mb (iMbXy, pCtx, pCurLayer->pScaledTCoeff[iMbXy], pCurLayer);
-  }
-  return ERR_NONE;
-}
-
-int32_t WelsMbInterPrediction (PWelsDecoderContext pCtx, PDqLayer pCurLayer) {
-  int32_t iMbX = pCurLayer->iMbX;
-  int32_t iMbY = pCurLayer->iMbY;
-  uint8_t*  pDstY, *pDstCb, *pDstCr;
-
-  int32_t iLumaStride   = pCtx->pDec->iLinesize[0];
-  int32_t iChromaStride = pCtx->pDec->iLinesize[1];
-
-  pDstY  = pCurLayer->pDec->pData[0] + ((iMbY * iLumaStride + iMbX) << 4);
-  pDstCb = pCurLayer->pDec->pData[1] + ((iMbY * iChromaStride + iMbX) << 3);
-  pDstCr = pCurLayer->pDec->pData[2] + ((iMbY * iChromaStride + iMbX) << 3);
-
-  GetInterPred (pDstY, pDstCb, pDstCr, pCtx);
-
-  return ERR_NONE;
-}
-
-int32_t WelsTargetMbConstruction (PWelsDecoderContext pCtx) {
-  PDqLayer pCurLayer = pCtx->pCurDqLayer;
-  if (MB_TYPE_INTRA_PCM == pCurLayer->pMbType[pCurLayer->iMbXyIndex]) {
-    //already decoded and reconstructed when parsing
-    return ERR_NONE;
-  } else if (IS_INTRA (pCurLayer->pMbType[pCurLayer->iMbXyIndex])) {
-    WelsMbIntraPredictionConstruction (pCtx, pCurLayer, 1);
-  } else if (IS_INTER (pCurLayer->pMbType[pCurLayer->iMbXyIndex])) { //InterMB
-    if (0 == pCurLayer->pCbp[pCurLayer->iMbXyIndex]) { //uiCbp==0 include SKIP
-      WelsMbInterPrediction (pCtx, pCurLayer);
-    } else {
-      WelsMbInterConstruction (pCtx, pCurLayer);
-    }
-  } else {
-    WelsLog (& (pCtx->sLogCtx), WELS_LOG_WARNING, "WelsTargetMbConstruction():::::Unknown MB type: %d",
-             pCurLayer->pMbType[pCurLayer->iMbXyIndex]);
-    return ERR_INFO_MB_RECON_FAIL;
-  }
-
-  return ERR_NONE;
-}
-
-void WelsChromaDcIdct (int16_t* pBlock) {
-  int32_t iStride = 32;
-  int32_t iXStride = 16;
-  int32_t iStride1 = iXStride + iStride;
-  int16_t* pBlk = pBlock;
-  int32_t iA, iB, iC, iD, iE;
-
-  iA = pBlk[0];
-  iB = pBlk[iXStride];
-  iC = pBlk[iStride];
-  iD = pBlk[iStride1];
-
-  iE = iA - iB;
-  iA += iB;
-  iB = iC - iD;
-  iC += iD;
-
-  pBlk[0] = (iA + iC);
-  pBlk[iXStride] = (iE + iB);
-  pBlk[iStride] = (iA - iC);
-  pBlk[iStride1] = (iE - iB);
-}
-
-void WelsMapNxNNeighToSampleNormal (PWelsNeighAvail pNeighAvail, int32_t* pSampleAvail) {
-  if (pNeighAvail->iLeftAvail) {  //left
-    pSampleAvail[ 6] =
-      pSampleAvail[12] =
-        pSampleAvail[18] =
-          pSampleAvail[24] = 1;
-  }
-  if (pNeighAvail->iLeftTopAvail) { //top_left
-    pSampleAvail[0] = 1;
-  }
-  if (pNeighAvail->iTopAvail) { //top
-    pSampleAvail[1] =
-      pSampleAvail[2] =
-        pSampleAvail[3] =
-          pSampleAvail[4] = 1;
-  }
-  if (pNeighAvail->iRightTopAvail) { //top_right
-    pSampleAvail[5] = 1;
-  }
-}
-
-void WelsMapNxNNeighToSampleConstrain1 (PWelsNeighAvail pNeighAvail, int32_t* pSampleAvail) {
-  if (pNeighAvail->iLeftAvail && IS_INTRA (pNeighAvail->iLeftType)) {   //left
-    pSampleAvail[ 6] =
-      pSampleAvail[12] =
-        pSampleAvail[18] =
-          pSampleAvail[24] = 1;
-  }
-  if (pNeighAvail->iLeftTopAvail && IS_INTRA (pNeighAvail->iLeftTopType)) {  //top_left
-    pSampleAvail[0] = 1;
-  }
-  if (pNeighAvail->iTopAvail && IS_INTRA (pNeighAvail->iTopType)) {  //top
-    pSampleAvail[1] =
-      pSampleAvail[2] =
-        pSampleAvail[3] =
-          pSampleAvail[4] = 1;
-  }
-  if (pNeighAvail->iRightTopAvail && IS_INTRA (pNeighAvail->iRightTopType)) {  //top_right
-    pSampleAvail[5] = 1;
-  }
-}
-void WelsMap16x16NeighToSampleNormal (PWelsNeighAvail pNeighAvail, uint8_t* pSampleAvail) {
-  if (pNeighAvail->iLeftAvail) {
-    *pSampleAvail = (1 << 2);
-  }
-  if (pNeighAvail->iLeftTopAvail) {
-    *pSampleAvail |= (1 << 1);
-  }
-  if (pNeighAvail->iTopAvail) {
-    *pSampleAvail |= 1;
-  }
-}
-
-void WelsMap16x16NeighToSampleConstrain1 (PWelsNeighAvail pNeighAvail, uint8_t* pSampleAvail) {
-  if (pNeighAvail->iLeftAvail && IS_INTRA (pNeighAvail->iLeftType)) {
-    *pSampleAvail = (1 << 2);
-  }
-  if (pNeighAvail->iLeftTopAvail && IS_INTRA (pNeighAvail->iLeftTopType)) {
-    *pSampleAvail |= (1 << 1);
-  }
-  if (pNeighAvail->iTopAvail && IS_INTRA (pNeighAvail->iTopType)) {
-    *pSampleAvail |= 1;
-  }
-}
-
-int32_t ParseIntra4x4Mode (PWelsDecoderContext pCtx, PWelsNeighAvail pNeighAvail, int8_t* pIntraPredMode,
-                           PBitStringAux pBs,
-                           PDqLayer pCurDqLayer) {
-  int32_t iSampleAvail[5 * 6] = { 0 }; //initialize as 0
-  int32_t iMbXy = pCurDqLayer->iMbXyIndex;
-  int32_t iFinalMode, i;
-
-  uint8_t uiNeighAvail = 0;
-  uint32_t uiCode;
-  int32_t iCode;
-  pCtx->pMapNxNNeighToSampleFunc (pNeighAvail, iSampleAvail);
-  uiNeighAvail = (iSampleAvail[6] << 2) | (iSampleAvail[0] << 1) | (iSampleAvail[1]);
-  for (i = 0; i < 16; i++) {
-    int32_t iPrevIntra4x4PredMode = 0;
-    if (pCurDqLayer->sLayerInfo.pPps->bEntropyCodingModeFlag) {
-      WELS_READ_VERIFY (ParseIntraPredModeLumaCabac (pCtx, iCode));
-      iPrevIntra4x4PredMode = iCode;
-    } else {
-      WELS_READ_VERIFY (BsGetOneBit (pBs, &uiCode));
-      iPrevIntra4x4PredMode = uiCode;
-    }
-    const int32_t kiPredMode = PredIntra4x4Mode (pIntraPredMode, i);
-
-    int8_t iBestMode;
-    if (pCurDqLayer->sLayerInfo.pPps->bEntropyCodingModeFlag) {
-      if (iPrevIntra4x4PredMode == -1)
-        iBestMode = kiPredMode;
-      else
-        iBestMode = iPrevIntra4x4PredMode + (iPrevIntra4x4PredMode >= kiPredMode);
-    } else {
-      if (iPrevIntra4x4PredMode) {
-        iBestMode = kiPredMode;
-      } else {
-        WELS_READ_VERIFY (BsGetBits (pBs, 3, &uiCode));
-        iBestMode = uiCode + ((int32_t) uiCode >= kiPredMode);
-      }
-    }
-
-    iFinalMode = CheckIntraNxNPredMode (&iSampleAvail[0], &iBestMode, i, false);
-    if (iFinalMode == GENERATE_ERROR_NO (ERR_LEVEL_MB_DATA, ERR_INVALID_INTRA4X4_MODE)) {
-      return GENERATE_ERROR_NO (ERR_LEVEL_MB_DATA, ERR_INFO_INVALID_I4x4_PRED_MODE);
-    }
-
-    pCurDqLayer->pIntra4x4FinalMode[iMbXy][g_kuiScan4[i]] = iFinalMode;
-
-    pIntraPredMode[g_kuiScan8[i]] = iBestMode;
-
-    iSampleAvail[g_kuiCache30ScanIdx[i]] = 1;
-  }
-  ST32 (&pCurDqLayer->pIntraPredMode[iMbXy][0], LD32 (&pIntraPredMode[1 + 8 * 4]));
-  pCurDqLayer->pIntraPredMode[iMbXy][4] = pIntraPredMode[4 + 8 * 1];
-  pCurDqLayer->pIntraPredMode[iMbXy][5] = pIntraPredMode[4 + 8 * 2];
-  pCurDqLayer->pIntraPredMode[iMbXy][6] = pIntraPredMode[4 + 8 * 3];
-
-  if (pCtx->pSps->uiChromaFormatIdc == 0)//no need parse chroma
-    return ERR_NONE;
-
-  if (pCurDqLayer->sLayerInfo.pPps->bEntropyCodingModeFlag) {
-    WELS_READ_VERIFY (ParseIntraPredModeChromaCabac (pCtx, uiNeighAvail, iCode));
-    if (iCode > MAX_PRED_MODE_ID_CHROMA) {
-      return GENERATE_ERROR_NO (ERR_LEVEL_MB_DATA, ERR_INFO_INVALID_I_CHROMA_PRED_MODE);
-    }
-    pCurDqLayer->pChromaPredMode[iMbXy] = iCode;
-  } else {
-    WELS_READ_VERIFY (BsGetUe (pBs, &uiCode)); //intra_chroma_pred_mode
-    if (uiCode > MAX_PRED_MODE_ID_CHROMA) {
-      return GENERATE_ERROR_NO (ERR_LEVEL_MB_DATA, ERR_INFO_INVALID_I_CHROMA_PRED_MODE);
-    }
-    pCurDqLayer->pChromaPredMode[iMbXy] = uiCode;
-  }
-
-  if (-1 == pCurDqLayer->pChromaPredMode[iMbXy]
-      || CheckIntraChromaPredMode (uiNeighAvail, &pCurDqLayer->pChromaPredMode[iMbXy])) {
-    return GENERATE_ERROR_NO (ERR_LEVEL_MB_DATA, ERR_INFO_INVALID_I_CHROMA_PRED_MODE);
-  }
-  return ERR_NONE;
-}
-
-int32_t ParseIntra8x8Mode (PWelsDecoderContext pCtx, PWelsNeighAvail pNeighAvail, int8_t* pIntraPredMode,
-                           PBitStringAux pBs,
-                           PDqLayer pCurDqLayer) {
-  // Similar with Intra_4x4, can put them together when needed
-  int32_t iSampleAvail[5 * 6] = { 0 }; //initialize as 0
-  int32_t iMbXy = pCurDqLayer->iMbXyIndex;
-  int32_t iFinalMode, i;
-
-  uint8_t uiNeighAvail = 0;
-  uint32_t uiCode;
-  int32_t iCode;
-  pCtx->pMapNxNNeighToSampleFunc (pNeighAvail, iSampleAvail);
-  // Top-Right : Left : Top-Left : Top
-  uiNeighAvail = (iSampleAvail[5] << 3) | (iSampleAvail[6] << 2) | (iSampleAvail[0] << 1) | (iSampleAvail[1]);
-
-  pCurDqLayer->pIntraNxNAvailFlag[iMbXy] = uiNeighAvail;
-
-  for (i = 0; i < 4; i++) {
-    int32_t iPrevIntra4x4PredMode = 0;
-    if (pCurDqLayer->sLayerInfo.pPps->bEntropyCodingModeFlag) {
-      WELS_READ_VERIFY (ParseIntraPredModeLumaCabac (pCtx, iCode));
-      iPrevIntra4x4PredMode = iCode;
-    } else {
-      WELS_READ_VERIFY (BsGetOneBit (pBs, &uiCode));
-      iPrevIntra4x4PredMode = uiCode;
-    }
-    const int32_t kiPredMode = PredIntra4x4Mode (pIntraPredMode, i << 2);
-
-    int8_t iBestMode;
-    if (pCurDqLayer->sLayerInfo.pPps->bEntropyCodingModeFlag) {
-      if (iPrevIntra4x4PredMode == -1)
-        iBestMode = kiPredMode;
-      else
-        iBestMode = iPrevIntra4x4PredMode + (iPrevIntra4x4PredMode >= kiPredMode);
-    } else {
-      if (iPrevIntra4x4PredMode) {
-        iBestMode = kiPredMode;
-      } else {
-        WELS_READ_VERIFY (BsGetBits (pBs, 3, &uiCode));
-        iBestMode = uiCode + ((int32_t) uiCode >= kiPredMode);
-      }
-    }
-
-    iFinalMode = CheckIntraNxNPredMode (&iSampleAvail[0], &iBestMode, i << 2, true);
-
-    if (iFinalMode == GENERATE_ERROR_NO (ERR_LEVEL_MB_DATA, ERR_INVALID_INTRA4X4_MODE)) {
-      return GENERATE_ERROR_NO (ERR_LEVEL_MB_DATA, ERR_INFO_INVALID_I4x4_PRED_MODE);
-    }
-
-    for (int j = 0; j < 4; j++) {
-      pCurDqLayer->pIntra4x4FinalMode[iMbXy][g_kuiScan4[ (i << 2) + j]] = iFinalMode;
-      pIntraPredMode[g_kuiScan8[ (i << 2) + j]] = iBestMode;
-      iSampleAvail[g_kuiCache30ScanIdx[ (i << 2) + j]] = 1;
-    }
-  }
-  ST32 (&pCurDqLayer->pIntraPredMode[iMbXy][0], LD32 (&pIntraPredMode[1 + 8 * 4]));
-  pCurDqLayer->pIntraPredMode[iMbXy][4] = pIntraPredMode[4 + 8 * 1];
-  pCurDqLayer->pIntraPredMode[iMbXy][5] = pIntraPredMode[4 + 8 * 2];
-  pCurDqLayer->pIntraPredMode[iMbXy][6] = pIntraPredMode[4 + 8 * 3];
-
-  if (pCtx->pSps->uiChromaFormatIdc == 0)
-    return ERR_NONE;
-
-  if (pCurDqLayer->sLayerInfo.pPps->bEntropyCodingModeFlag) {
-    WELS_READ_VERIFY (ParseIntraPredModeChromaCabac (pCtx, uiNeighAvail, iCode));
-    if (iCode > MAX_PRED_MODE_ID_CHROMA) {
-      return GENERATE_ERROR_NO (ERR_LEVEL_MB_DATA, ERR_INFO_INVALID_I_CHROMA_PRED_MODE);
-    }
-    pCurDqLayer->pChromaPredMode[iMbXy] = iCode;
-  } else {
-    WELS_READ_VERIFY (BsGetUe (pBs, &uiCode)); //intra_chroma_pred_mode
-    if (uiCode > MAX_PRED_MODE_ID_CHROMA) {
-      return GENERATE_ERROR_NO (ERR_LEVEL_MB_DATA, ERR_INFO_INVALID_I_CHROMA_PRED_MODE);
-    }
-    pCurDqLayer->pChromaPredMode[iMbXy] = uiCode;
-  }
-
-  if (-1 == pCurDqLayer->pChromaPredMode[iMbXy]
-      || CheckIntraChromaPredMode (uiNeighAvail, &pCurDqLayer->pChromaPredMode[iMbXy])) {
-    return GENERATE_ERROR_NO (ERR_LEVEL_MB_DATA, ERR_INFO_INVALID_I_CHROMA_PRED_MODE);
-  }
-
-  return ERR_NONE;
-}
-
-int32_t ParseIntra16x16Mode (PWelsDecoderContext pCtx, PWelsNeighAvail pNeighAvail, PBitStringAux pBs,
-                             PDqLayer pCurDqLayer) {
-  int32_t iMbXy = pCurDqLayer->iMbXyIndex;
-  uint8_t uiNeighAvail = 0; //0x07 = 0 1 1 1, means left, top-left, top avail or not. (1: avail, 0: unavail)
-  uint32_t uiCode;
-  int32_t iCode;
-  pCtx->pMap16x16NeighToSampleFunc (pNeighAvail, &uiNeighAvail);
-
-  if (CheckIntra16x16PredMode (uiNeighAvail,
-                               &pCurDqLayer->pIntraPredMode[iMbXy][7])) { //invalid iPredMode, must stop decoding
-    return GENERATE_ERROR_NO (ERR_LEVEL_MB_DATA, ERR_INFO_INVALID_I16x16_PRED_MODE);
-  }
-  if (pCtx->pSps->uiChromaFormatIdc == 0)
-    return ERR_NONE;
-
-  if (pCurDqLayer->sLayerInfo.pPps->bEntropyCodingModeFlag) {
-    WELS_READ_VERIFY (ParseIntraPredModeChromaCabac (pCtx, uiNeighAvail, iCode));
-    if (iCode > MAX_PRED_MODE_ID_CHROMA) {
-      return GENERATE_ERROR_NO (ERR_LEVEL_MB_DATA, ERR_INFO_INVALID_I_CHROMA_PRED_MODE);
-    }
-    pCurDqLayer->pChromaPredMode[iMbXy] = iCode;
-  } else {
-    WELS_READ_VERIFY (BsGetUe (pBs, &uiCode)); //intra_chroma_pred_mode
-    if (uiCode > MAX_PRED_MODE_ID_CHROMA) {
-      return GENERATE_ERROR_NO (ERR_LEVEL_MB_DATA, ERR_INFO_INVALID_I_CHROMA_PRED_MODE);
-    }
-    pCurDqLayer->pChromaPredMode[iMbXy] = uiCode;
-  }
-  if (-1 == pCurDqLayer->pChromaPredMode[iMbXy]
-      || CheckIntraChromaPredMode (uiNeighAvail, &pCurDqLayer->pChromaPredMode[iMbXy])) {
-    return GENERATE_ERROR_NO (ERR_LEVEL_MB_DATA, ERR_INFO_INVALID_I_CHROMA_PRED_MODE);
-  }
-
-  return ERR_NONE;
-}
-
-int32_t WelsDecodeMbCabacISliceBaseMode0 (PWelsDecoderContext pCtx, uint32_t& uiEosFlag) {
-  PDqLayer pCurLayer             = pCtx->pCurDqLayer;
-  PBitStringAux pBsAux           = pCurLayer->pBitStringAux;
-  PSlice pSlice                  = &pCurLayer->sLayerInfo.sSliceInLayer;
-  PSliceHeader pSliceHeader      = &pSlice->sSliceHeaderExt.sSliceHeader;
-  SWelsNeighAvail sNeighAvail;
-  int32_t iScanIdxStart = pSlice->sSliceHeaderExt.uiScanIdxStart;
-  int32_t iScanIdxEnd   = pSlice->sSliceHeaderExt.uiScanIdxEnd;
-  int32_t iMbXy = pCurLayer->iMbXyIndex;
-  int32_t i;
-  uint32_t uiMbType = 0, uiCbp = 0, uiCbpLuma = 0, uiCbpChroma = 0;
-
-  ENFORCE_STACK_ALIGN_1D (uint8_t, pNonZeroCount, 48, 16);
-
-  pCurLayer->pNoSubMbPartSizeLessThan8x8Flag[iMbXy] = true;
-  pCurLayer->pTransformSize8x8Flag[iMbXy] = false;
-
-  pCurLayer->pInterPredictionDoneFlag[iMbXy] = 0;
-  pCurLayer->pResidualPredFlag[iMbXy] = pSlice->sSliceHeaderExt.bDefaultResidualPredFlag;
-  GetNeighborAvailMbType (&sNeighAvail, pCurLayer);
-  WELS_READ_VERIFY (ParseMBTypeISliceCabac (pCtx, &sNeighAvail, uiMbType));
-  if (uiMbType > 25) {
-    return GENERATE_ERROR_NO (ERR_LEVEL_MB_DATA, ERR_INFO_INVALID_MB_TYPE);
-  } else if (!pCtx->pSps->uiChromaFormatIdc && ((uiMbType >= 5 && uiMbType <= 12) || (uiMbType >= 17
-             && uiMbType <= 24))) {
-    return GENERATE_ERROR_NO (ERR_LEVEL_MB_DATA, ERR_INFO_INVALID_MB_TYPE);
-  } else if (25 == uiMbType) {   //I_PCM
-    WelsLog (& (pCtx->sLogCtx), WELS_LOG_DEBUG, "I_PCM mode exists in I slice!");
-    WELS_READ_VERIFY (ParseIPCMInfoCabac (pCtx));
-    pSlice->iLastDeltaQp = 0;
-    WELS_READ_VERIFY (ParseEndOfSliceCabac (pCtx, uiEosFlag));
-    if (uiEosFlag) {
-      RestoreCabacDecEngineToBS (pCtx->pCabacDecEngine, pCtx->pCurDqLayer->pBitStringAux);
-    }
-    return ERR_NONE;
-  } else if (0 == uiMbType) { //I4x4
-    ENFORCE_STACK_ALIGN_1D (int8_t, pIntraPredMode, 48, 16);
-    pCurLayer->pMbType[iMbXy] = MB_TYPE_INTRA4x4;
-    if (pCtx->pPps->bTransform8x8ModeFlag) {
-      // Transform 8x8 cabac will be added soon
-      WELS_READ_VERIFY (ParseTransformSize8x8FlagCabac (pCtx, &sNeighAvail, pCtx->pCurDqLayer->pTransformSize8x8Flag[iMbXy]));
-    }
-    if (pCtx->pCurDqLayer->pTransformSize8x8Flag[iMbXy]) {
-      uiMbType = pCurLayer->pMbType[iMbXy] = MB_TYPE_INTRA8x8;
-      pCtx->pFillInfoCacheIntraNxNFunc (&sNeighAvail, pNonZeroCount, pIntraPredMode, pCurLayer);
-      WELS_READ_VERIFY (ParseIntra8x8Mode (pCtx, &sNeighAvail, pIntraPredMode, pBsAux, pCurLayer));
-    } else {
-      pCtx->pFillInfoCacheIntraNxNFunc (&sNeighAvail, pNonZeroCount, pIntraPredMode, pCurLayer);
-      WELS_READ_VERIFY (ParseIntra4x4Mode (pCtx, &sNeighAvail, pIntraPredMode, pBsAux, pCurLayer));
-    }
-    //get uiCbp for I4x4
-    WELS_READ_VERIFY (ParseCbpInfoCabac (pCtx, &sNeighAvail, uiCbp));
-    pCurLayer->pCbp[iMbXy] = uiCbp;
-    pSlice->iLastDeltaQp = uiCbp == 0 ? 0 : pSlice->iLastDeltaQp;
-    uiCbpChroma = pCtx->pSps->uiChromaFormatIdc ? uiCbp >> 4 : 0;
-    uiCbpLuma = uiCbp & 15;
-  } else { //I16x16;
-    pCurLayer->pMbType[iMbXy] = MB_TYPE_INTRA16x16;
-    pCurLayer->pTransformSize8x8Flag[iMbXy] = false;
-    pCurLayer->pNoSubMbPartSizeLessThan8x8Flag[iMbXy] = true;
-    pCurLayer->pIntraPredMode[iMbXy][7] = (uiMbType - 1) & 3;
-    pCurLayer->pCbp[iMbXy] = g_kuiI16CbpTable[ (uiMbType - 1) >> 2];
-    uiCbpChroma = pCtx->pSps->uiChromaFormatIdc ? pCurLayer->pCbp[iMbXy] >> 4 : 0 ;
-    uiCbpLuma = pCurLayer->pCbp[iMbXy] & 15;
-    WelsFillCacheNonZeroCount (&sNeighAvail, pNonZeroCount, pCurLayer);
-    WELS_READ_VERIFY (ParseIntra16x16Mode (pCtx, &sNeighAvail, pBsAux, pCurLayer));
-  }
-
-  ST32 (&pCurLayer->pNzc[iMbXy][0], 0);
-  ST32 (&pCurLayer->pNzc[iMbXy][4], 0);
-  ST32 (&pCurLayer->pNzc[iMbXy][8], 0);
-  ST32 (&pCurLayer->pNzc[iMbXy][12], 0);
-  ST32 (&pCurLayer->pNzc[iMbXy][16], 0);
-  ST32 (&pCurLayer->pNzc[iMbXy][20], 0);
-  pCurLayer->pCbfDc[iMbXy] = 0;
-
-  if (pCurLayer->pCbp[iMbXy] == 0 && IS_INTRANxN (pCurLayer->pMbType[iMbXy])) {
-    pCurLayer->pLumaQp[iMbXy] = pSlice->iLastMbQp;
-    for (i = 0; i < 2; i++) {
-      pCurLayer->pChromaQp[iMbXy][i] = g_kuiChromaQpTable[WELS_CLIP3 ((pCurLayer->pLumaQp[iMbXy] +
-                                       pSliceHeader->pPps->iChromaQpIndexOffset[i]), 0, 51)];
-    }
-  }
-
-  if (pCurLayer->pCbp[iMbXy] || MB_TYPE_INTRA16x16 == pCurLayer->pMbType[iMbXy]) {
-    memset (pCurLayer->pScaledTCoeff[iMbXy], 0, 384 * sizeof (pCurLayer->pScaledTCoeff[iMbXy][0]));
-    int32_t iQpDelta, iId8x8, iId4x4;
-    WELS_READ_VERIFY (ParseDeltaQpCabac (pCtx, iQpDelta));
-    if (iQpDelta > 25 || iQpDelta < -26) {//out of iQpDelta range
-      return GENERATE_ERROR_NO (ERR_LEVEL_MB_DATA, ERR_INFO_INVALID_QP);
-    }
-    pCurLayer->pLumaQp[iMbXy] = (pSlice->iLastMbQp + iQpDelta + 52) % 52; //update last_mb_qp
-    pSlice->iLastMbQp = pCurLayer->pLumaQp[iMbXy];
-    for (i = 0; i < 2; i++) {
-      pCurLayer->pChromaQp[iMbXy][i] = g_kuiChromaQpTable[WELS_CLIP3 ((pSlice->iLastMbQp +
-                                       pSliceHeader->pPps->iChromaQpIndexOffset[i]), 0, 51)];
-    }
-    if (MB_TYPE_INTRA16x16 == pCurLayer->pMbType[iMbXy]) {
-      //step1: Luma DC
-      WELS_READ_VERIFY (ParseResidualBlockCabac (&sNeighAvail, pNonZeroCount, pBsAux, 0, 16, g_kuiLumaDcZigzagScan,
-                        I16_LUMA_DC, pCurLayer->pScaledTCoeff[iMbXy], pCurLayer->pLumaQp[iMbXy], pCtx));
-      //step2: Luma AC
-      if (uiCbpLuma) {
-        for (i = 0; i < 16; i++) {
-          WELS_READ_VERIFY (ParseResidualBlockCabac (&sNeighAvail, pNonZeroCount, pBsAux, i,
-                            iScanIdxEnd - WELS_MAX (iScanIdxStart, 1) + 1, g_kuiZigzagScan + WELS_MAX (iScanIdxStart, 1), I16_LUMA_AC,
-                            pCurLayer->pScaledTCoeff[iMbXy] + (i << 4), pCurLayer->pLumaQp[iMbXy], pCtx));
-        }
-        ST32 (&pCurLayer->pNzc[iMbXy][0], LD32 (&pNonZeroCount[1 + 8 * 1]));
-        ST32 (&pCurLayer->pNzc[iMbXy][4], LD32 (&pNonZeroCount[1 + 8 * 2]));
-        ST32 (&pCurLayer->pNzc[iMbXy][8], LD32 (&pNonZeroCount[1 + 8 * 3]));
-        ST32 (&pCurLayer->pNzc[iMbXy][12], LD32 (&pNonZeroCount[1 + 8 * 4]));
-      } else { //pNonZeroCount = 0
-        ST32 (&pCurLayer->pNzc[iMbXy][0], 0);
-        ST32 (&pCurLayer->pNzc[iMbXy][4], 0);
-        ST32 (&pCurLayer->pNzc[iMbXy][8], 0);
-        ST32 (&pCurLayer->pNzc[iMbXy][12], 0);
-      }
-    } else { //non-MB_TYPE_INTRA16x16
-      if (pCurLayer->pTransformSize8x8Flag[iMbXy]) {
-        // Transform 8x8 support for CABAC
-        for (iId8x8 = 0; iId8x8 < 4; iId8x8++) {
-          if (uiCbpLuma & (1 << iId8x8)) {
-            WELS_READ_VERIFY (ParseResidualBlockCabac8x8 (&sNeighAvail, pNonZeroCount, pBsAux, (iId8x8 << 2),
-                              iScanIdxEnd - iScanIdxStart + 1, g_kuiZigzagScan8x8 + iScanIdxStart, LUMA_DC_AC_INTRA_8,
-                              pCurLayer->pScaledTCoeff[iMbXy] + (iId8x8 << 6), pCurLayer->pLumaQp[iMbXy], pCtx));
-          } else {
-            ST16 (&pNonZeroCount[g_kCacheNzcScanIdx[ (iId8x8 << 2)]], 0);
-            ST16 (&pNonZeroCount[g_kCacheNzcScanIdx[ (iId8x8 << 2) + 2]], 0);
-          }
-        }
-        ST32 (&pCurLayer->pNzc[iMbXy][0], LD32 (&pNonZeroCount[1 + 8 * 1]));
-        ST32 (&pCurLayer->pNzc[iMbXy][4], LD32 (&pNonZeroCount[1 + 8 * 2]));
-        ST32 (&pCurLayer->pNzc[iMbXy][8], LD32 (&pNonZeroCount[1 + 8 * 3]));
-        ST32 (&pCurLayer->pNzc[iMbXy][12], LD32 (&pNonZeroCount[1 + 8 * 4]));
-      } else {
-        for (iId8x8 = 0; iId8x8 < 4; iId8x8++) {
-          if (uiCbpLuma & (1 << iId8x8)) {
-            int32_t iIdx = (iId8x8 << 2);
-            for (iId4x4 = 0; iId4x4 < 4; iId4x4++) {
-              //Luma (DC and AC decoding together)
-              WELS_READ_VERIFY (ParseResidualBlockCabac (&sNeighAvail, pNonZeroCount, pBsAux, iIdx, iScanIdxEnd - iScanIdxStart + 1,
-                                g_kuiZigzagScan + iScanIdxStart, LUMA_DC_AC_INTRA, pCurLayer->pScaledTCoeff[iMbXy] + (iIdx << 4),
-                                pCurLayer->pLumaQp[iMbXy], pCtx));
-              iIdx++;
-            }
-          } else {
-            ST16 (&pNonZeroCount[g_kCacheNzcScanIdx[ (iId8x8 << 2)]], 0);
-            ST16 (&pNonZeroCount[g_kCacheNzcScanIdx[ (iId8x8 << 2) + 2]], 0);
-          }
-        }
-        ST32 (&pCurLayer->pNzc[iMbXy][0], LD32 (&pNonZeroCount[1 + 8 * 1]));
-        ST32 (&pCurLayer->pNzc[iMbXy][4], LD32 (&pNonZeroCount[1 + 8 * 2]));
-        ST32 (&pCurLayer->pNzc[iMbXy][8], LD32 (&pNonZeroCount[1 + 8 * 3]));
-        ST32 (&pCurLayer->pNzc[iMbXy][12], LD32 (&pNonZeroCount[1 + 8 * 4]));
-      }
-    }
-    int32_t iMbResProperty;
-    //chroma
-    //step1: DC
-    if (1 == uiCbpChroma || 2 == uiCbpChroma) {
-      //Cb Cr
-      for (i = 0; i < 2; i++) {
-        iMbResProperty = i ? CHROMA_DC_V : CHROMA_DC_U;
-        WELS_READ_VERIFY (ParseResidualBlockCabac (&sNeighAvail, pNonZeroCount, pBsAux, 16 + (i << 2), 4, g_kuiChromaDcScan,
-                          iMbResProperty, pCurLayer->pScaledTCoeff[iMbXy] + 256 + (i << 6), pCurLayer->pChromaQp[iMbXy][i], pCtx));
-      }
-    }
-
-    //step2: AC
-    if (2 == uiCbpChroma) {
-      for (i = 0; i < 2; i++) { //Cb Cr
-        iMbResProperty = i ? CHROMA_AC_V : CHROMA_AC_U;
-        int32_t iIdx = 16 + (i << 2);
-        for (iId4x4 = 0; iId4x4 < 4; iId4x4++) {
-          WELS_READ_VERIFY (ParseResidualBlockCabac (&sNeighAvail, pNonZeroCount, pBsAux, iIdx,
-                            iScanIdxEnd - WELS_MAX (iScanIdxStart, 1) + 1, g_kuiZigzagScan + WELS_MAX (iScanIdxStart, 1), iMbResProperty,
-                            pCurLayer->pScaledTCoeff[iMbXy] + (iIdx << 4), pCurLayer->pChromaQp[iMbXy][i], pCtx));
-          iIdx++;
-        }
-      }
-      ST16 (&pCurLayer->pNzc[iMbXy][16], LD16 (&pNonZeroCount[6 + 8 * 1]));
-      ST16 (&pCurLayer->pNzc[iMbXy][20], LD16 (&pNonZeroCount[6 + 8 * 2]));
-      ST16 (&pCurLayer->pNzc[iMbXy][18], LD16 (&pNonZeroCount[6 + 8 * 4]));
-      ST16 (&pCurLayer->pNzc[iMbXy][22], LD16 (&pNonZeroCount[6 + 8 * 5]));
-    } else {
-      ST16 (&pCurLayer->pNzc[iMbXy][16], 0);
-      ST16 (&pCurLayer->pNzc[iMbXy][20], 0);
-      ST16 (&pCurLayer->pNzc[iMbXy][18], 0);
-      ST16 (&pCurLayer->pNzc[iMbXy][22], 0);
-    }
-  } else {
-    ST32 (&pCurLayer->pNzc[iMbXy][0], 0);
-    ST32 (&pCurLayer->pNzc[iMbXy][4], 0);
-    ST32 (&pCurLayer->pNzc[iMbXy][8], 0);
-    ST32 (&pCurLayer->pNzc[iMbXy][12], 0);
-    ST32 (&pCurLayer->pNzc[iMbXy][16], 0);
-    ST32 (&pCurLayer->pNzc[iMbXy][20], 0);
-  }
-
-  WELS_READ_VERIFY (ParseEndOfSliceCabac (pCtx, uiEosFlag));
-  if (uiEosFlag) {
-    RestoreCabacDecEngineToBS (pCtx->pCabacDecEngine, pCtx->pCurDqLayer->pBitStringAux);
-  }
-  return ERR_NONE;
-}
-
-int32_t WelsDecodeMbCabacISlice (PWelsDecoderContext pCtx, PNalUnit pNalCur, uint32_t& uiEosFlag) {
-  WELS_READ_VERIFY (WelsDecodeMbCabacISliceBaseMode0 (pCtx, uiEosFlag));
-  return ERR_NONE;
-}
-
-int32_t WelsDecodeMbCabacPSliceBaseMode0 (PWelsDecoderContext pCtx, PWelsNeighAvail pNeighAvail, uint32_t& uiEosFlag) {
-  PDqLayer pCurLayer             = pCtx->pCurDqLayer;
-  PBitStringAux pBsAux           = pCurLayer->pBitStringAux;
-  PSlice pSlice                  = &pCurLayer->sLayerInfo.sSliceInLayer;
-  PSliceHeader pSliceHeader      = &pSlice->sSliceHeaderExt.sSliceHeader;
-
-  int32_t iScanIdxStart = pSlice->sSliceHeaderExt.uiScanIdxStart;
-  int32_t iScanIdxEnd   = pSlice->sSliceHeaderExt.uiScanIdxEnd;
-  int32_t iMbXy = pCurLayer->iMbXyIndex;
-  int32_t iMbResProperty;
-  int32_t i;
-  uint32_t uiMbType = 0, uiCbp = 0, uiCbpLuma = 0, uiCbpChroma = 0;
-
-  ENFORCE_STACK_ALIGN_1D (uint8_t, pNonZeroCount, 48, 16);
-
-  pCurLayer->pInterPredictionDoneFlag[iMbXy] = 0;
-
-  WELS_READ_VERIFY (ParseMBTypePSliceCabac (pCtx, pNeighAvail, uiMbType));
-  // uiMbType = 4 is not allowded.
-  if (uiMbType < 4) { //Inter mode
-    int16_t pMotionVector[LIST_A][30][MV_A];
-    int16_t pMvdCache[LIST_A][30][MV_A];
-    int8_t  pRefIndex[LIST_A][30];
-    pCurLayer->pMbType[iMbXy] = g_ksInterMbTypeInfo[uiMbType].iType;
-    WelsFillCacheInterCabac (pNeighAvail, pNonZeroCount, pMotionVector, pMvdCache, pRefIndex, pCurLayer);
-    WELS_READ_VERIFY (ParseInterMotionInfoCabac (pCtx, pNeighAvail, pNonZeroCount, pMotionVector, pMvdCache, pRefIndex));
-    pCurLayer->pInterPredictionDoneFlag[iMbXy] = 0;
-  } else { //Intra mode
-    uiMbType -= 5;
-    if (uiMbType > 25)
-      return GENERATE_ERROR_NO (ERR_LEVEL_MB_DATA, ERR_INFO_INVALID_MB_TYPE);
-    if (!pCtx->pSps->uiChromaFormatIdc && ((uiMbType >= 5 && uiMbType <= 12) || (uiMbType >= 17 && uiMbType <= 24)))
-      return GENERATE_ERROR_NO (ERR_LEVEL_MB_DATA, ERR_INFO_INVALID_MB_TYPE);
-
-    if (25 == uiMbType) {   //I_PCM
-      WelsLog (& (pCtx->sLogCtx), WELS_LOG_DEBUG, "I_PCM mode exists in P slice!");
-      WELS_READ_VERIFY (ParseIPCMInfoCabac (pCtx));
-      pSlice->iLastDeltaQp = 0;
-      WELS_READ_VERIFY (ParseEndOfSliceCabac (pCtx, uiEosFlag));
-      if (uiEosFlag) {
-        RestoreCabacDecEngineToBS (pCtx->pCabacDecEngine, pCtx->pCurDqLayer->pBitStringAux);
-      }
-      return ERR_NONE;
-    } else { //normal Intra mode
-      if (0 == uiMbType) { //Intra4x4
-        ENFORCE_STACK_ALIGN_1D (int8_t, pIntraPredMode, 48, 16);
-        pCurLayer->pMbType[iMbXy] = MB_TYPE_INTRA4x4;
-        if (pCtx->pPps->bTransform8x8ModeFlag) {
-          WELS_READ_VERIFY (ParseTransformSize8x8FlagCabac (pCtx, pNeighAvail, pCtx->pCurDqLayer->pTransformSize8x8Flag[iMbXy]));
-        }
-        if (pCtx->pCurDqLayer->pTransformSize8x8Flag[iMbXy]) {
-          uiMbType = pCurLayer->pMbType[iMbXy] = MB_TYPE_INTRA8x8;
-          pCtx->pFillInfoCacheIntraNxNFunc (pNeighAvail, pNonZeroCount, pIntraPredMode, pCurLayer);
-          WELS_READ_VERIFY (ParseIntra8x8Mode (pCtx, pNeighAvail, pIntraPredMode, pBsAux, pCurLayer));
-        } else {
-          pCtx->pFillInfoCacheIntraNxNFunc (pNeighAvail, pNonZeroCount, pIntraPredMode, pCurLayer);
-          WELS_READ_VERIFY (ParseIntra4x4Mode (pCtx, pNeighAvail, pIntraPredMode, pBsAux, pCurLayer));
-        }
-      } else { //Intra16x16
-        pCurLayer->pMbType[iMbXy] = MB_TYPE_INTRA16x16;
-        pCurLayer->pTransformSize8x8Flag[iMbXy] = false;
-        pCurLayer->pNoSubMbPartSizeLessThan8x8Flag[iMbXy] = true;
-        pCurLayer->pIntraPredMode[iMbXy][7] = (uiMbType - 1) & 3;
-        pCurLayer->pCbp[iMbXy] = g_kuiI16CbpTable[ (uiMbType - 1) >> 2];
-        uiCbpChroma = pCtx->pSps->uiChromaFormatIdc ? pCurLayer->pCbp[iMbXy] >> 4 : 0;
-        uiCbpLuma = pCurLayer->pCbp[iMbXy] & 15;
-        WelsFillCacheNonZeroCount (pNeighAvail, pNonZeroCount, pCurLayer);
-        WELS_READ_VERIFY (ParseIntra16x16Mode (pCtx, pNeighAvail, pBsAux, pCurLayer));
-      }
-    }
-  }
-
-  ST32 (&pCurLayer->pNzc[iMbXy][0], 0);
-  ST32 (&pCurLayer->pNzc[iMbXy][4], 0);
-  ST32 (&pCurLayer->pNzc[iMbXy][8], 0);
-  ST32 (&pCurLayer->pNzc[iMbXy][12], 0);
-  ST32 (&pCurLayer->pNzc[iMbXy][16], 0);
-  ST32 (&pCurLayer->pNzc[iMbXy][20], 0);
-
-  if (MB_TYPE_INTRA16x16 != pCurLayer->pMbType[iMbXy]) {
-    WELS_READ_VERIFY (ParseCbpInfoCabac (pCtx, pNeighAvail, uiCbp));
-
-    pCurLayer->pCbp[iMbXy] = uiCbp;
-    pSlice->iLastDeltaQp = uiCbp == 0 ? 0 : pSlice->iLastDeltaQp;
-    uiCbpChroma = pCtx->pSps->uiChromaFormatIdc ? pCurLayer->pCbp[iMbXy] >> 4 : 0 ;
-    uiCbpLuma = pCurLayer->pCbp[iMbXy] & 15;
-  }
-
-  if (pCurLayer->pCbp[iMbXy] || MB_TYPE_INTRA16x16 == pCurLayer->pMbType[iMbXy]) {
-
-    if (MB_TYPE_INTRA16x16 != pCurLayer->pMbType[iMbXy]) {
-      // Need modification when B picutre add in
-      bool bNeedParseTransformSize8x8Flag =
-        (((pCurLayer->pMbType[iMbXy] >= MB_TYPE_16x16 && pCurLayer->pMbType[iMbXy] <= MB_TYPE_8x16)
-          || pCurLayer->pNoSubMbPartSizeLessThan8x8Flag[iMbXy])
-         && (pCurLayer->pMbType[iMbXy] != MB_TYPE_INTRA8x8)
-         && (pCurLayer->pMbType[iMbXy] != MB_TYPE_INTRA4x4)
-         && ((pCurLayer->pCbp[iMbXy] & 0x0F) > 0)
-         && (pCtx->pPps->bTransform8x8ModeFlag));
-
-      if (bNeedParseTransformSize8x8Flag) {
-        WELS_READ_VERIFY (ParseTransformSize8x8FlagCabac (pCtx, pNeighAvail,
-                          pCtx->pCurDqLayer->pTransformSize8x8Flag[iMbXy])); //transform_size_8x8_flag
-      }
-    }
-
-    memset (pCurLayer->pScaledTCoeff[iMbXy], 0, 384 * sizeof (pCurLayer->pScaledTCoeff[iMbXy][0]));
-
-    int32_t iQpDelta, iId8x8, iId4x4;
-
-    WELS_READ_VERIFY (ParseDeltaQpCabac (pCtx, iQpDelta));
-    if (iQpDelta > 25 || iQpDelta < -26) { //out of iQpDelta range
-      return GENERATE_ERROR_NO (ERR_LEVEL_MB_DATA, ERR_INFO_INVALID_QP);
-    }
-    pCurLayer->pLumaQp[iMbXy] = (pSlice->iLastMbQp + iQpDelta + 52) % 52; //update last_mb_qp
-    pSlice->iLastMbQp = pCurLayer->pLumaQp[iMbXy];
-    for (i = 0; i < 2; i++) {
-      pCurLayer->pChromaQp[iMbXy][i] = g_kuiChromaQpTable[WELS_CLIP3 (pSlice->iLastMbQp +
-                                       pSliceHeader->pPps->iChromaQpIndexOffset[i], 0, 51)];
-    }
-
-    if (MB_TYPE_INTRA16x16 == pCurLayer->pMbType[iMbXy]) {
-      //step1: Luma DC
-      WELS_READ_VERIFY (ParseResidualBlockCabac (pNeighAvail, pNonZeroCount, pBsAux, 0, 16, g_kuiLumaDcZigzagScan,
-                        I16_LUMA_DC, pCurLayer->pScaledTCoeff[iMbXy], pCurLayer->pLumaQp[iMbXy], pCtx));
-      //step2: Luma AC
-      if (uiCbpLuma) {
-        for (i = 0; i < 16; i++) {
-          WELS_READ_VERIFY (ParseResidualBlockCabac (pNeighAvail, pNonZeroCount, pBsAux, i, iScanIdxEnd - WELS_MAX (iScanIdxStart,
-                            1) + 1, g_kuiZigzagScan + WELS_MAX (iScanIdxStart, 1), I16_LUMA_AC, pCurLayer->pScaledTCoeff[iMbXy] + (i << 4),
-                            pCurLayer->pLumaQp[iMbXy], pCtx));
-        }
-        ST32 (&pCurLayer->pNzc[iMbXy][0], LD32 (&pNonZeroCount[1 + 8 * 1]));
-        ST32 (&pCurLayer->pNzc[iMbXy][4], LD32 (&pNonZeroCount[1 + 8 * 2]));
-        ST32 (&pCurLayer->pNzc[iMbXy][8], LD32 (&pNonZeroCount[1 + 8 * 3]));
-        ST32 (&pCurLayer->pNzc[iMbXy][12], LD32 (&pNonZeroCount[1 + 8 * 4]));
-      } else {
-        ST32 (&pCurLayer->pNzc[iMbXy][0], 0);
-        ST32 (&pCurLayer->pNzc[iMbXy][4], 0);
-        ST32 (&pCurLayer->pNzc[iMbXy][8], 0);
-        ST32 (&pCurLayer->pNzc[iMbXy][12], 0);
-      }
-    } else { //non-MB_TYPE_INTRA16x16
-      if (pCtx->pCurDqLayer->pTransformSize8x8Flag[iMbXy]) {
-        // Transform 8x8 support for CABAC
-        for (iId8x8 = 0; iId8x8 < 4; iId8x8++) {
-          if (uiCbpLuma & (1 << iId8x8)) {
-            WELS_READ_VERIFY (ParseResidualBlockCabac8x8 (pNeighAvail, pNonZeroCount, pBsAux, (iId8x8 << 2),
-                              iScanIdxEnd - iScanIdxStart + 1, g_kuiZigzagScan8x8 + iScanIdxStart,
-                              IS_INTRA (pCurLayer->pMbType[iMbXy]) ? LUMA_DC_AC_INTRA_8 : LUMA_DC_AC_INTER_8,
-                              pCurLayer->pScaledTCoeff[iMbXy] + (iId8x8 << 6), pCurLayer->pLumaQp[iMbXy], pCtx));
-          } else {
-            ST16 (&pNonZeroCount[g_kCacheNzcScanIdx[ (iId8x8 << 2)]], 0);
-            ST16 (&pNonZeroCount[g_kCacheNzcScanIdx[ (iId8x8 << 2) + 2]], 0);
-          }
-        }
-        ST32 (&pCurLayer->pNzc[iMbXy][0], LD32 (&pNonZeroCount[1 + 8 * 1]));
-        ST32 (&pCurLayer->pNzc[iMbXy][4], LD32 (&pNonZeroCount[1 + 8 * 2]));
-        ST32 (&pCurLayer->pNzc[iMbXy][8], LD32 (&pNonZeroCount[1 + 8 * 3]));
-        ST32 (&pCurLayer->pNzc[iMbXy][12], LD32 (&pNonZeroCount[1 + 8 * 4]));
-      } else {
-        iMbResProperty = (IS_INTRA (pCurLayer->pMbType[iMbXy])) ? LUMA_DC_AC_INTRA : LUMA_DC_AC_INTER;
-        for (iId8x8 = 0; iId8x8 < 4; iId8x8++) {
-          if (uiCbpLuma & (1 << iId8x8)) {
-            int32_t iIdx = (iId8x8 << 2);
-            for (iId4x4 = 0; iId4x4 < 4; iId4x4++) {
-              //Luma (DC and AC decoding together)
-              WELS_READ_VERIFY (ParseResidualBlockCabac (pNeighAvail, pNonZeroCount, pBsAux, iIdx, iScanIdxEnd - iScanIdxStart + 1,
-                                g_kuiZigzagScan + iScanIdxStart, iMbResProperty, pCurLayer->pScaledTCoeff[iMbXy] + (iIdx << 4),
-                                pCurLayer->pLumaQp[iMbXy],
-                                pCtx));
-              iIdx++;
-            }
-          } else {
-            ST16 (&pNonZeroCount[g_kCacheNzcScanIdx[iId8x8 << 2]], 0);
-            ST16 (&pNonZeroCount[g_kCacheNzcScanIdx[ (iId8x8 << 2) + 2]], 0);
-          }
-        }
-        ST32 (&pCurLayer->pNzc[iMbXy][0], LD32 (&pNonZeroCount[1 + 8 * 1]));
-        ST32 (&pCurLayer->pNzc[iMbXy][4], LD32 (&pNonZeroCount[1 + 8 * 2]));
-        ST32 (&pCurLayer->pNzc[iMbXy][8], LD32 (&pNonZeroCount[1 + 8 * 3]));
-        ST32 (&pCurLayer->pNzc[iMbXy][12], LD32 (&pNonZeroCount[1 + 8 * 4]));
-      }
-    }
-
-    //chroma
-    //step1: DC
-    if (1 == uiCbpChroma || 2 == uiCbpChroma) {
-      for (i = 0; i < 2; i++) {
-        if (IS_INTRA (pCurLayer->pMbType[iMbXy]))
-          iMbResProperty = i ? CHROMA_DC_V : CHROMA_DC_U;
-        else
-          iMbResProperty = i ? CHROMA_DC_V_INTER : CHROMA_DC_U_INTER;
-
-        WELS_READ_VERIFY (ParseResidualBlockCabac (pNeighAvail, pNonZeroCount, pBsAux, 16 + (i << 2), 4, g_kuiChromaDcScan,
-                          iMbResProperty, pCurLayer->pScaledTCoeff[iMbXy] + 256 + (i << 6), pCurLayer->pChromaQp[iMbXy][i], pCtx));
-      }
-    }
-    //step2: AC
-    if (2 == uiCbpChroma) {
-      for (i = 0; i < 2; i++) {
-        if (IS_INTRA (pCurLayer->pMbType[iMbXy]))
-          iMbResProperty = i ? CHROMA_AC_V : CHROMA_AC_U;
-        else
-          iMbResProperty = i ? CHROMA_AC_V_INTER : CHROMA_AC_U_INTER;
-        int32_t index = 16 + (i << 2);
-        for (iId4x4 = 0; iId4x4 < 4; iId4x4++) {
-          WELS_READ_VERIFY (ParseResidualBlockCabac (pNeighAvail, pNonZeroCount, pBsAux, index,
-                            iScanIdxEnd - WELS_MAX (iScanIdxStart, 1) + 1, g_kuiZigzagScan + WELS_MAX (iScanIdxStart, 1),
-                            iMbResProperty, pCurLayer->pScaledTCoeff[iMbXy] + (index << 4), pCurLayer->pChromaQp[iMbXy][i], pCtx));
-          index++;
-        }
-      }
-      ST16 (&pCurLayer->pNzc[iMbXy][16], LD16 (&pNonZeroCount[6 + 8 * 1]));
-      ST16 (&pCurLayer->pNzc[iMbXy][20], LD16 (&pNonZeroCount[6 + 8 * 2]));
-      ST16 (&pCurLayer->pNzc[iMbXy][18], LD16 (&pNonZeroCount[6 + 8 * 4]));
-      ST16 (&pCurLayer->pNzc[iMbXy][22], LD16 (&pNonZeroCount[6 + 8 * 5]));
-    } else {
-      ST32 (&pCurLayer->pNzc[iMbXy][16], 0);
-      ST32 (&pCurLayer->pNzc[iMbXy][20], 0);
-    }
-  } else {
-    pCurLayer->pLumaQp[iMbXy] = pSlice->iLastMbQp;
-    for (i = 0; i < 2; i++) {
-      pCurLayer->pChromaQp[iMbXy][i] = g_kuiChromaQpTable[WELS_CLIP3 (pCurLayer->pLumaQp[iMbXy] +
-                                       pSliceHeader->pPps->iChromaQpIndexOffset[i], 0, 51)];
-    }
-  }
-
-  WELS_READ_VERIFY (ParseEndOfSliceCabac (pCtx, uiEosFlag));
-  if (uiEosFlag) {
-    RestoreCabacDecEngineToBS (pCtx->pCabacDecEngine, pCtx->pCurDqLayer->pBitStringAux);
-  }
-
-  return ERR_NONE;
-}
-
-int32_t WelsDecodeMbCabacPSlice (PWelsDecoderContext pCtx, PNalUnit pNalCur, uint32_t& uiEosFlag) {
-  PDqLayer pCurLayer             = pCtx->pCurDqLayer;
-  PSlice pSlice                  = &pCurLayer->sLayerInfo.sSliceInLayer;
-  PSliceHeader pSliceHeader      = &pSlice->sSliceHeaderExt.sSliceHeader;
-  PPicture* ppRefPic = pCtx->sRefPic.pRefList[LIST_0];
-  uint32_t uiCode;
-  int32_t iMbXy = pCurLayer->iMbXyIndex;
-  int32_t i;
-  SWelsNeighAvail uiNeighAvail;
-  pCurLayer->pCbp[iMbXy] = 0;
-  pCurLayer->pCbfDc[iMbXy] = 0;
-  pCurLayer->pChromaPredMode[iMbXy] = C_PRED_DC;
-
-  pCurLayer->pNoSubMbPartSizeLessThan8x8Flag[iMbXy] = true;
-  pCurLayer->pTransformSize8x8Flag[iMbXy] = false;
-
-  GetNeighborAvailMbType (&uiNeighAvail, pCurLayer);
-  WELS_READ_VERIFY (ParseSkipFlagCabac (pCtx, &uiNeighAvail, uiCode));
-
-  if (uiCode) {
-    int16_t pMv[2] = {0};
-    pCurLayer->pMbType[iMbXy] = MB_TYPE_SKIP;
-    ST32 (&pCurLayer->pNzc[iMbXy][0], 0);
-    ST32 (&pCurLayer->pNzc[iMbXy][4], 0);
-    ST32 (&pCurLayer->pNzc[iMbXy][8], 0);
-    ST32 (&pCurLayer->pNzc[iMbXy][12], 0);
-    ST32 (&pCurLayer->pNzc[iMbXy][16], 0);
-    ST32 (&pCurLayer->pNzc[iMbXy][20], 0);
-
-    pCurLayer->pInterPredictionDoneFlag[iMbXy] = 0;
-    memset (pCurLayer->pRefIndex[0][iMbXy], 0, sizeof (int8_t) * 16);
-    pCtx->bMbRefConcealed = pCtx->bRPLRError || pCtx->bMbRefConcealed || ! (ppRefPic[0] && ppRefPic[0]->bIsComplete);
-    //predict mv
-    PredPSkipMvFromNeighbor (pCurLayer, pMv);
-    for (i = 0; i < 16; i++) {
-      ST32 (pCurLayer->pMv[0][iMbXy][i], * (uint32_t*)pMv);
-      ST32 (pCurLayer->pMvd[0][iMbXy][i], 0);
-    }
-
-    //if (!pSlice->sSliceHeaderExt.bDefaultResidualPredFlag) {
-    //  memset (pCurLayer->pScaledTCoeff[iMbXy], 0, 384 * sizeof (int16_t));
-    //}
-
-    //reset rS
-    pCurLayer->pLumaQp[iMbXy] = pSlice->iLastMbQp; //??????????????? dqaunt of previous mb
-    for (i = 0; i < 2; i++) {
-      pCurLayer->pChromaQp[iMbXy][i] = g_kuiChromaQpTable[WELS_CLIP3 (pCurLayer->pLumaQp[iMbXy] +
-                                       pSliceHeader->pPps->iChromaQpIndexOffset[i], 0, 51)];
-    }
-
-    //for neighboring CABAC usage
-    pSlice->iLastDeltaQp = 0;
-
-    WELS_READ_VERIFY (ParseEndOfSliceCabac (pCtx, uiEosFlag));
-
-    return ERR_NONE;
-  }
-
-  WELS_READ_VERIFY (WelsDecodeMbCabacPSliceBaseMode0 (pCtx, &uiNeighAvail, uiEosFlag));
-  return ERR_NONE;
-}
-// Calculate deqaunt coeff scaling list value
-int32_t  WelsCalcDeqCoeffScalingList (PWelsDecoderContext pCtx) {
-  if (pCtx->pSps->bSeqScalingMatrixPresentFlag || pCtx->pPps->bPicScalingMatrixPresentFlag) {
-    pCtx->bUseScalingList = true;
-
-    if (!pCtx->bDequantCoeff4x4Init || (pCtx->iDequantCoeffPpsid != pCtx->pPps->iPpsId)) {
-      int i, q, x, y;
-      //Init dequant coeff value for different QP
-      for (i = 0; i < 6; i++) {
-        pCtx->pDequant_coeff4x4[i] = pCtx->pDequant_coeff_buffer4x4[i];
-        pCtx->pDequant_coeff8x8[i] = pCtx->pDequant_coeff_buffer8x8[i];
-        for (q = 0; q < 51; q++) {
-          for (x = 0; x < 16; x++) {
-            pCtx->pDequant_coeff4x4[i][q][x] = pCtx->pPps->bPicScalingMatrixPresentFlag ? pCtx->pPps->iScalingList4x4[i][x] *
-                                               g_kuiDequantCoeff[q][x & 0x07] : pCtx->pSps->iScalingList4x4[i][x] * g_kuiDequantCoeff[q][x & 0x07];
-          }
-          for (y = 0; y < 64; y++) {
-            pCtx->pDequant_coeff8x8[i][q][y] = pCtx->pPps->bPicScalingMatrixPresentFlag ? pCtx->pPps->iScalingList8x8[i][y] *
-                                               g_kuiMatrixV[q % 6][y / 8][y % 8] : pCtx->pSps->iScalingList8x8[i][y] * g_kuiMatrixV[q % 6][y / 8][y % 8];
-          }
-        }
-      }
-      pCtx->bDequantCoeff4x4Init = true;
-      pCtx->iDequantCoeffPpsid = pCtx->pPps->iPpsId;
-    }
-  } else
-    pCtx->bUseScalingList = false;
-  return ERR_NONE;
-}
-
-int32_t WelsDecodeSlice (PWelsDecoderContext pCtx, bool bFirstSliceInLayer, PNalUnit pNalCur) {
-  PDqLayer pCurLayer = pCtx->pCurDqLayer;
-  PFmo pFmo = pCtx->pFmo;
-  int32_t iRet;
-  int32_t iNextMbXyIndex, iSliceIdc;
-
-  PSlice pSlice = &pCurLayer->sLayerInfo.sSliceInLayer;
-  PSliceHeaderExt pSliceHeaderExt = &pSlice->sSliceHeaderExt;
-  PSliceHeader pSliceHeader = &pSliceHeaderExt->sSliceHeader;
-  int32_t iMbX, iMbY;
-  const int32_t kiCountNumMb = pSliceHeader->pSps->uiTotalMbCount; //need to be correct when fmo or multi slice
-  uint32_t uiEosFlag = 0;
-  PWelsDecMbFunc pDecMbFunc;
-
-  pSlice->iTotalMbInCurSlice = 0; //initialize at the starting of slice decoding.
-
-  if (pCtx->pPps->bEntropyCodingModeFlag) {
-    if (pSlice->sSliceHeaderExt.bAdaptiveMotionPredFlag ||
-        pSlice->sSliceHeaderExt.bAdaptiveBaseModeFlag ||
-        pSlice->sSliceHeaderExt.bAdaptiveResidualPredFlag) {
-      WelsLog (& (pCtx->sLogCtx), WELS_LOG_ERROR,
-               "WelsDecodeSlice()::::ILP flag exist, not supported with CABAC enabled!");
-      pCtx->iErrorCode |= dsBitstreamError;
-      return dsBitstreamError;
-    }
-    if (P_SLICE == pSliceHeader->eSliceType)
-      pDecMbFunc = WelsDecodeMbCabacPSlice;
-    else //I_SLICE. B_SLICE not supported now
-      pDecMbFunc = WelsDecodeMbCabacISlice;
-  } else {
-    if (P_SLICE == pSliceHeader->eSliceType) {
-      pDecMbFunc = WelsDecodeMbCavlcPSlice;
-    } else { //I_SLICE
-      pDecMbFunc = WelsDecodeMbCavlcISlice;
-    }
-  }
-
-  if (pSliceHeader->pPps->bConstainedIntraPredFlag) {
-    pCtx->pFillInfoCacheIntraNxNFunc = WelsFillCacheConstrain1IntraNxN;
-    pCtx->pMapNxNNeighToSampleFunc    = WelsMapNxNNeighToSampleConstrain1;
-    pCtx->pMap16x16NeighToSampleFunc  = WelsMap16x16NeighToSampleConstrain1;
-  } else {
-    pCtx->pFillInfoCacheIntraNxNFunc = WelsFillCacheConstrain0IntraNxN;
-    pCtx->pMapNxNNeighToSampleFunc    = WelsMapNxNNeighToSampleNormal;
-    pCtx->pMap16x16NeighToSampleFunc  = WelsMap16x16NeighToSampleNormal;
-  }
-
-  pCtx->eSliceType = pSliceHeader->eSliceType;
-  if (pCurLayer->sLayerInfo.pPps->bEntropyCodingModeFlag == 1) {
-    int32_t iQp = pSlice->sSliceHeaderExt.sSliceHeader.iSliceQp;
-    int32_t iCabacInitIdc = pSlice->sSliceHeaderExt.sSliceHeader.iCabacInitIdc;
-    WelsCabacContextInit (pCtx, pSlice->eSliceType, iCabacInitIdc, iQp);
-    //InitCabacCtx (pCtx->pCabacCtx, pSlice->eSliceType, iCabacInitIdc, iQp);
-    pSlice->iLastDeltaQp = 0;
-    WELS_READ_VERIFY (InitCabacDecEngineFromBS (pCtx->pCabacDecEngine, pCtx->pCurDqLayer->pBitStringAux));
-  }
-  //try to calculate  the dequant_coeff
-  WelsCalcDeqCoeffScalingList (pCtx);
-
-  iNextMbXyIndex = pSliceHeader->iFirstMbInSlice;
-  iMbX = iNextMbXyIndex % pCurLayer->iMbWidth;
-  iMbY = iNextMbXyIndex / pCurLayer->iMbWidth; // error is introduced by multiple slices case, 11/23/2009
-  pSlice->iMbSkipRun = -1;
-  iSliceIdc = (pSliceHeader->iFirstMbInSlice << 7) + pCurLayer->uiLayerDqId;
-
-  pCurLayer->iMbX =  iMbX;
-  pCurLayer->iMbY = iMbY;
-  pCurLayer->iMbXyIndex = iNextMbXyIndex;
-
-  do {
-    if ((-1 == iNextMbXyIndex) || (iNextMbXyIndex >= kiCountNumMb)) { // slice group boundary or end of a frame
-      break;
-    }
-
-    pCurLayer->pSliceIdc[iNextMbXyIndex] = iSliceIdc;
-    pCtx->bMbRefConcealed = false;
-    iRet = pDecMbFunc (pCtx,  pNalCur, uiEosFlag);
-    pCurLayer->pMbRefConcealedFlag[iNextMbXyIndex] = pCtx->bMbRefConcealed;
-    if (iRet != ERR_NONE) {
-      return iRet;
-    }
-
-    ++pSlice->iTotalMbInCurSlice;
-    if (uiEosFlag) { //end of slice
-      break;
-    }
-    if (pSliceHeader->pPps->uiNumSliceGroups > 1) {
-      iNextMbXyIndex = FmoNextMb (pFmo, iNextMbXyIndex);
-    } else {
-      ++iNextMbXyIndex;
-    }
-    iMbX = iNextMbXyIndex % pCurLayer->iMbWidth;
-    iMbY = iNextMbXyIndex / pCurLayer->iMbWidth;
-    pCurLayer->iMbX =  iMbX;
-    pCurLayer->iMbY = iMbY;
-    pCurLayer->iMbXyIndex = iNextMbXyIndex;
-  } while (1);
-
-  return ERR_NONE;
-}
-
-int32_t WelsActualDecodeMbCavlcISlice (PWelsDecoderContext pCtx) {
-  SVlcTable* pVlcTable     = &pCtx->sVlcTable;
-  PDqLayer pCurLayer             = pCtx->pCurDqLayer;
-  PBitStringAux pBs              = pCurLayer->pBitStringAux;
-  PSlice pSlice                  = &pCurLayer->sLayerInfo.sSliceInLayer;
-  PSliceHeader pSliceHeader      = &pSlice->sSliceHeaderExt.sSliceHeader;
-
-  SWelsNeighAvail sNeighAvail;
-  int32_t iMbResProperty;
-
-  int32_t iScanIdxStart = pSlice->sSliceHeaderExt.uiScanIdxStart;
-  int32_t iScanIdxEnd   = pSlice->sSliceHeaderExt.uiScanIdxEnd;
-
-  int32_t iMbX = pCurLayer->iMbX;
-  int32_t iMbY = pCurLayer->iMbY;
-  const int32_t iMbXy = pCurLayer->iMbXyIndex;
-  int8_t* pNzc = pCurLayer->pNzc[iMbXy];
-  int32_t i;
-  int32_t iRet = ERR_NONE;
-  uint32_t uiMbType = 0, uiCbp = 0, uiCbpL = 0, uiCbpC = 0;
-  uint32_t uiCode;
-  int32_t iCode;
-
-  ENFORCE_STACK_ALIGN_1D (uint8_t, pNonZeroCount, 48, 16);
-  GetNeighborAvailMbType (&sNeighAvail, pCurLayer);
-  pCurLayer->pInterPredictionDoneFlag[iMbXy] = 0;
-  pCurLayer->pResidualPredFlag[iMbXy] = pSlice->sSliceHeaderExt.bDefaultResidualPredFlag;
-
-  pCurLayer->pNoSubMbPartSizeLessThan8x8Flag[iMbXy] = true;
-  pCurLayer->pTransformSize8x8Flag[iMbXy] = false;
-
-  WELS_READ_VERIFY (BsGetUe (pBs, &uiCode)); //uiMbType
-  uiMbType = uiCode;
-  if (uiMbType > 25)
-    return GENERATE_ERROR_NO (ERR_LEVEL_MB_DATA, ERR_INFO_INVALID_MB_TYPE);
-  if (!pCtx->pSps->uiChromaFormatIdc && ((uiMbType >= 5 && uiMbType <= 12) || (uiMbType >= 17 && uiMbType <= 24)))
-    return GENERATE_ERROR_NO (ERR_LEVEL_MB_DATA, ERR_INFO_INVALID_MB_TYPE);
-
-  if (25 == uiMbType) {
-    WelsLog (& (pCtx->sLogCtx), WELS_LOG_DEBUG, "I_PCM mode exists in I slice!");
-    int32_t iDecStrideL = pCurLayer->pDec->iLinesize[0];
-    int32_t iDecStrideC = pCurLayer->pDec->iLinesize[1];
-
-    int32_t iOffsetL = (iMbX + iMbY * iDecStrideL) << 4;
-    int32_t iOffsetC = (iMbX + iMbY * iDecStrideC) << 3;
-
-    uint8_t* pDecY = pCurLayer->pDec->pData[0] + iOffsetL;
-    uint8_t* pDecU = pCurLayer->pDec->pData[1] + iOffsetC;
-    uint8_t* pDecV = pCurLayer->pDec->pData[2] + iOffsetC;
-
-    uint8_t* pTmpBsBuf;
-
-
-    int32_t i;
-    int32_t iCopySizeY  = (sizeof (uint8_t) << 4);
-    int32_t iCopySizeUV = (sizeof (uint8_t) << 3);
-
-    int32_t iIndex = ((-pBs->iLeftBits) >> 3) + 2;
-
-    pCurLayer->pMbType[iMbXy] = MB_TYPE_INTRA_PCM;
-
-    //step 1: locating bit-stream pointer [must align into integer byte]
-    pBs->pCurBuf -= iIndex;
-
-    //step 2: copy pixel from bit-stream into fdec [reconstruction]
-    pTmpBsBuf = pBs->pCurBuf;
-    if (!pCtx->pParam->bParseOnly) {
-      for (i = 0; i < 16; i++) { //luma
-        memcpy (pDecY , pTmpBsBuf, iCopySizeY);
-        pDecY += iDecStrideL;
-        pTmpBsBuf += 16;
-      }
-      for (i = 0; i < 8; i++) { //cb
-        memcpy (pDecU, pTmpBsBuf, iCopySizeUV);
-        pDecU += iDecStrideC;
-        pTmpBsBuf += 8;
-      }
-      for (i = 0; i < 8; i++) { //cr
-        memcpy (pDecV, pTmpBsBuf, iCopySizeUV);
-        pDecV += iDecStrideC;
-        pTmpBsBuf += 8;
-      }
-    }
-
-    pBs->pCurBuf += 384;
-
-    //step 3: update QP and pNonZeroCount
-    pCurLayer->pLumaQp[iMbXy] = 0;
-    memset (pCurLayer->pChromaQp[iMbXy], 0, sizeof (pCurLayer->pChromaQp[iMbXy]));
-    memset (pNzc, 16, sizeof (pCurLayer->pNzc[iMbXy]));   //Rec. 9.2.1 for PCM, nzc=16
-    WELS_READ_VERIFY (InitReadBits (pBs, 0));
-    return ERR_NONE;
-  } else if (0 == uiMbType) { //reference to JM
-    ENFORCE_STACK_ALIGN_1D (int8_t, pIntraPredMode, 48, 16);
-    pCurLayer->pMbType[iMbXy] = MB_TYPE_INTRA4x4;
-    if (pCtx->pPps->bTransform8x8ModeFlag) {
-      WELS_READ_VERIFY (BsGetOneBit (pBs, &uiCode)); //transform_size_8x8_flag
-      pCurLayer->pTransformSize8x8Flag[iMbXy] = !!uiCode;
-      if (pCurLayer->pTransformSize8x8Flag[iMbXy]) {
-        uiMbType = pCurLayer->pMbType[iMbXy] = MB_TYPE_INTRA8x8;
-      }
-    }
-    if (!pCurLayer->pTransformSize8x8Flag[iMbXy]) {
-      pCtx->pFillInfoCacheIntraNxNFunc (&sNeighAvail, pNonZeroCount, pIntraPredMode, pCurLayer);
-      WELS_READ_VERIFY (ParseIntra4x4Mode (pCtx, &sNeighAvail, pIntraPredMode, pBs, pCurLayer));
-    } else {
-      pCtx->pFillInfoCacheIntraNxNFunc (&sNeighAvail, pNonZeroCount, pIntraPredMode, pCurLayer);
-      WELS_READ_VERIFY (ParseIntra8x8Mode (pCtx, &sNeighAvail, pIntraPredMode, pBs, pCurLayer));
-    }
-
-    //uiCbp
-    WELS_READ_VERIFY (BsGetUe (pBs, &uiCode)); //coded_block_pattern
-    uiCbp = uiCode;
-    //G.9.1 Alternative parsing process for coded pBlock pattern
-    if (pCtx->pSps->uiChromaFormatIdc && (uiCbp > 47))
-      return GENERATE_ERROR_NO (ERR_LEVEL_MB_DATA, ERR_INFO_INVALID_CBP);
-    if (!pCtx->pSps->uiChromaFormatIdc && (uiCbp > 15))
-      return GENERATE_ERROR_NO (ERR_LEVEL_MB_DATA, ERR_INFO_INVALID_CBP);
-
-    if (pCtx->pSps->uiChromaFormatIdc)
-      uiCbp = g_kuiIntra4x4CbpTable[uiCbp];
-    else
-      uiCbp = g_kuiIntra4x4CbpTable400[uiCbp];
-    pCurLayer->pCbp[iMbXy] = uiCbp;
-    uiCbpC = uiCbp >> 4;
-    uiCbpL = uiCbp & 15;
-  } else { //I_PCM exclude, we can ignore it
-    pCurLayer->pMbType[iMbXy] = MB_TYPE_INTRA16x16;
-    pCurLayer->pTransformSize8x8Flag[iMbXy] = false;
-    pCurLayer->pNoSubMbPartSizeLessThan8x8Flag[iMbXy] = true;
-    pCurLayer->pIntraPredMode[iMbXy][7] = (uiMbType - 1) & 3;
-    pCurLayer->pCbp[iMbXy] = g_kuiI16CbpTable[ (uiMbType - 1) >> 2];
-    uiCbpC = pCtx->pSps->uiChromaFormatIdc ? pCurLayer->pCbp[iMbXy] >> 4 : 0;
-    uiCbpL = pCurLayer->pCbp[iMbXy] & 15;
-    WelsFillCacheNonZeroCount (&sNeighAvail, pNonZeroCount, pCurLayer);
-    WELS_READ_VERIFY (ParseIntra16x16Mode (pCtx, &sNeighAvail, pBs, pCurLayer));
-  }
-
-  ST32A4 (&pNzc[0], 0);
-  ST32A4 (&pNzc[4], 0);
-  ST32A4 (&pNzc[8], 0);
-  ST32A4 (&pNzc[12], 0);
-  ST32A4 (&pNzc[16], 0);
-  ST32A4 (&pNzc[20], 0);
-
-  if (pCurLayer->pCbp[iMbXy] == 0 && IS_INTRANxN (pCurLayer->pMbType[iMbXy])) {
-    pCurLayer->pLumaQp[iMbXy] = pSlice->iLastMbQp;
-    for (i = 0; i < 2; i++) {
-      pCurLayer->pChromaQp[iMbXy][i] = g_kuiChromaQpTable[WELS_CLIP3 (pCurLayer->pLumaQp[iMbXy] +
-                                       pSliceHeader->pPps->iChromaQpIndexOffset[i], 0, 51)];
-    }
-
-  }
-
-  if (pCurLayer->pCbp[iMbXy] || MB_TYPE_INTRA16x16 == pCurLayer->pMbType[iMbXy]) {
-    memset (pCurLayer->pScaledTCoeff[iMbXy], 0, 384 * sizeof (pCurLayer->pScaledTCoeff[iMbXy][0]));
-    int32_t iQpDelta, iId8x8, iId4x4;
-
-    WELS_READ_VERIFY (BsGetSe (pBs, &iCode)); //mb_qp_delta
-    iQpDelta = iCode;
-
-    if (iQpDelta > 25 || iQpDelta < -26) { //out of iQpDelta range
-      return GENERATE_ERROR_NO (ERR_LEVEL_MB_DATA, ERR_INFO_INVALID_QP);
-    }
-
-    pCurLayer->pLumaQp[iMbXy] = (pSlice->iLastMbQp + iQpDelta + 52) % 52; //update last_mb_qp
-    pSlice->iLastMbQp = pCurLayer->pLumaQp[iMbXy];
-    for (i = 0; i < 2; i++) {
-      pCurLayer->pChromaQp[iMbXy][i] = g_kuiChromaQpTable[WELS_CLIP3 (pSlice->iLastMbQp +
-                                       pSliceHeader->pPps->iChromaQpIndexOffset[i], 0,
-                                       51)];
-    }
-
-
-    BsStartCavlc (pBs);
-
-    if (MB_TYPE_INTRA16x16 == pCurLayer->pMbType[iMbXy]) {
-      //step1: Luma DC
-      if ((iRet = WelsResidualBlockCavlc (pVlcTable, pNonZeroCount, pBs, 0, 16, g_kuiLumaDcZigzagScan, I16_LUMA_DC,
-                                          pCurLayer->pScaledTCoeff[iMbXy], pCurLayer->pLumaQp[iMbXy], pCtx)) != ERR_NONE) {
-        return iRet;//abnormal
-      }
-      //step2: Luma AC
-      if (uiCbpL) {
-        for (i = 0; i < 16; i++) {
-          if ((iRet = WelsResidualBlockCavlc (pVlcTable, pNonZeroCount, pBs, i, iScanIdxEnd - WELS_MAX (iScanIdxStart, 1) + 1,
-                                              g_kuiZigzagScan + WELS_MAX (iScanIdxStart, 1), I16_LUMA_AC, pCurLayer->pScaledTCoeff[iMbXy] + (i << 4),
-                                              pCurLayer->pLumaQp[iMbXy], pCtx)) != ERR_NONE) {
-            return iRet;//abnormal
-          }
-        }
-        ST32A4 (&pNzc[0], LD32 (&pNonZeroCount[1 + 8 * 1]));
-        ST32A4 (&pNzc[4], LD32 (&pNonZeroCount[1 + 8 * 2]));
-        ST32A4 (&pNzc[8], LD32 (&pNonZeroCount[1 + 8 * 3]));
-        ST32A4 (&pNzc[12], LD32 (&pNonZeroCount[1 + 8 * 4]));
-      }
-    } else { //non-MB_TYPE_INTRA16x16
-      if (pCurLayer->pTransformSize8x8Flag[iMbXy]) {
-        for (iId8x8 = 0; iId8x8 < 4; iId8x8++) {
-          iMbResProperty = (IS_INTRA (pCurLayer->pMbType[iMbXy])) ? LUMA_DC_AC_INTRA_8 : LUMA_DC_AC_INTER_8;
-          if (uiCbpL & (1 << iId8x8)) {
-            int32_t iIndex = (iId8x8 << 2);
-            for (iId4x4 = 0; iId4x4 < 4; iId4x4++) {
-              if ((iRet = WelsResidualBlockCavlc8x8 (pVlcTable, pNonZeroCount, pBs, iIndex, iScanIdxEnd - iScanIdxStart + 1,
-                                                     g_kuiZigzagScan8x8 + iScanIdxStart, iMbResProperty, pCurLayer->pScaledTCoeff[iMbXy] + (iId8x8 << 6), iId4x4,
-                                                     pCurLayer->pLumaQp[iMbXy], pCtx)) != ERR_NONE) {
-                return iRet;
-              }
-              iIndex++;
-            }
-          } else {
-            ST16 (&pNonZeroCount[g_kuiCache48CountScan4Idx[iId8x8 << 2]], 0);
-            ST16 (&pNonZeroCount[g_kuiCache48CountScan4Idx[ (iId8x8 << 2) + 2]], 0);
-          }
-        }
-        ST32A4 (&pNzc[0], LD32 (&pNonZeroCount[1 + 8 * 1]));
-        ST32A4 (&pNzc[4], LD32 (&pNonZeroCount[1 + 8 * 2]));
-        ST32A4 (&pNzc[8], LD32 (&pNonZeroCount[1 + 8 * 3]));
-        ST32A4 (&pNzc[12], LD32 (&pNonZeroCount[1 + 8 * 4]));
-      } else {
-        for (iId8x8 = 0; iId8x8 < 4; iId8x8++) {
-          if (uiCbpL & (1 << iId8x8)) {
-            int32_t iIndex = (iId8x8 << 2);
-            for (iId4x4 = 0; iId4x4 < 4; iId4x4++) {
-              //Luma (DC and AC decoding together)
-              if ((iRet = WelsResidualBlockCavlc (pVlcTable, pNonZeroCount, pBs, iIndex, iScanIdxEnd - iScanIdxStart + 1,
-                                                  g_kuiZigzagScan + iScanIdxStart, LUMA_DC_AC_INTRA, pCurLayer->pScaledTCoeff[iMbXy] + (iIndex << 4),
-                                                  pCurLayer->pLumaQp[iMbXy], pCtx)) != ERR_NONE) {
-                return iRet;//abnormal
-              }
-              iIndex++;
-            }
-          } else {
-            ST16 (&pNonZeroCount[g_kuiCache48CountScan4Idx[ (iId8x8 << 2)]], 0);
-            ST16 (&pNonZeroCount[g_kuiCache48CountScan4Idx[ (iId8x8 << 2) + 2]], 0);
-          }
-        }
-        ST32A4 (&pNzc[0], LD32 (&pNonZeroCount[1 + 8 * 1]));
-        ST32A4 (&pNzc[4], LD32 (&pNonZeroCount[1 + 8 * 2]));
-        ST32A4 (&pNzc[8], LD32 (&pNonZeroCount[1 + 8 * 3]));
-        ST32A4 (&pNzc[12], LD32 (&pNonZeroCount[1 + 8 * 4]));
-      }
-    }
-
-    //chroma
-    //step1: DC
-    if (1 == uiCbpC || 2 == uiCbpC) {
-      for (i = 0; i < 2; i++) { //Cb Cr
-        iMbResProperty = i ? CHROMA_DC_V : CHROMA_DC_U;
-        if ((iRet = WelsResidualBlockCavlc (pVlcTable, pNonZeroCount, pBs, 16 + (i << 2), 4, g_kuiChromaDcScan, iMbResProperty,
-                                            pCurLayer->pScaledTCoeff[iMbXy] + 256 + (i << 6), pCurLayer->pChromaQp[iMbXy][i], pCtx)) != ERR_NONE) {
-          return iRet;//abnormal
-        }
-      }
-    }
-
-    //step2: AC
-    if (2 == uiCbpC) {
-      for (i = 0; i < 2; i++) { //Cb Cr
-        iMbResProperty = i ? CHROMA_AC_V : CHROMA_AC_U;
-        int32_t iIndex = 16 + (i << 2);
-        for (iId4x4 = 0; iId4x4 < 4; iId4x4++) {
-          if ((iRet = WelsResidualBlockCavlc (pVlcTable, pNonZeroCount, pBs, iIndex, iScanIdxEnd - WELS_MAX (iScanIdxStart,
-                                              1) + 1, g_kuiZigzagScan + WELS_MAX (iScanIdxStart, 1), iMbResProperty, pCurLayer->pScaledTCoeff[iMbXy] + (iIndex << 4),
-                                              pCurLayer->pChromaQp[iMbXy][i], pCtx)) != ERR_NONE) {
-            return iRet;//abnormal
-          }
-          iIndex++;
-        }
-      }
-      ST16A2 (&pNzc[16], LD16A2 (&pNonZeroCount[6 + 8 * 1]));
-      ST16A2 (&pNzc[20], LD16A2 (&pNonZeroCount[6 + 8 * 2]));
-      ST16A2 (&pNzc[18], LD16A2 (&pNonZeroCount[6 + 8 * 4]));
-      ST16A2 (&pNzc[22], LD16A2 (&pNonZeroCount[6 + 8 * 5]));
-    }
-    BsEndCavlc (pBs);
-  }
-
-  return ERR_NONE;
-}
-
-int32_t WelsDecodeMbCavlcISlice (PWelsDecoderContext pCtx, PNalUnit pNalCur, uint32_t& uiEosFlag) {
-  PDqLayer pCurLayer = pCtx->pCurDqLayer;
-  PBitStringAux pBs = pCurLayer->pBitStringAux;
-  PSliceHeaderExt pSliceHeaderExt = &pCurLayer->sLayerInfo.sSliceInLayer.sSliceHeaderExt;
-  int32_t iBaseModeFlag;
-  int32_t iRet = 0; //should have the return value to indicate decoding error or not, It's NECESSARY--2010.4.15
-  uint32_t uiCode;
-  intX_t iUsedBits;
-  if (pSliceHeaderExt->bAdaptiveBaseModeFlag == 1) {
-    WELS_READ_VERIFY (BsGetOneBit (pBs, &uiCode)); //base_mode_flag
-    iBaseModeFlag = uiCode;
-  } else {
-    iBaseModeFlag = pSliceHeaderExt->bDefaultBaseModeFlag;
-  }
-  if (!iBaseModeFlag) {
-    iRet = WelsActualDecodeMbCavlcISlice (pCtx);
-  } else {
-    WelsLog (& (pCtx->sLogCtx), WELS_LOG_WARNING, "iBaseModeFlag (%d) != 0, inter-layer prediction not supported.",
-             iBaseModeFlag);
-    return GENERATE_ERROR_NO (ERR_LEVEL_SLICE_HEADER, ERR_INFO_UNSUPPORTED_ILP);
-  }
-  if (iRet) { //occur error when parsing, MUST STOP decoding
-    return iRet;
-  }
-
-  // check whether there is left bits to read next time in case multiple slices
-  iUsedBits = ((pBs->pCurBuf - pBs->pStartBuf) << 3) - (16 - pBs->iLeftBits);
-  // sub 1, for stop bit
-  if ((iUsedBits == (pBs->iBits - 1)) && (0 >= pCurLayer->sLayerInfo.sSliceInLayer.iMbSkipRun)) { // slice boundary
-    uiEosFlag = 1;
-  }
-  if (iUsedBits > (pBs->iBits -
-                   1)) { //When BS incomplete, as long as find it, SHOULD stop decoding to avoid mosaic or crash.
-    WelsLog (& (pCtx->sLogCtx), WELS_LOG_WARNING,
-             "WelsDecodeMbCavlcISlice()::::pBs incomplete, iUsedBits:%" PRId64 " > pBs->iBits:%d, MUST stop decoding.",
-             (int64_t) iUsedBits, pBs->iBits);
-    return GENERATE_ERROR_NO (ERR_LEVEL_MB_DATA, ERR_INFO_BS_INCOMPLETE);
-  }
-  return ERR_NONE;
-}
-
-int32_t WelsActualDecodeMbCavlcPSlice (PWelsDecoderContext pCtx) {
-  SVlcTable* pVlcTable     = &pCtx->sVlcTable;
-  PDqLayer pCurLayer             = pCtx->pCurDqLayer;
-  PBitStringAux pBs              = pCurLayer->pBitStringAux;
-  PSlice pSlice                  = &pCurLayer->sLayerInfo.sSliceInLayer;
-  PSliceHeader pSliceHeader      = &pSlice->sSliceHeaderExt.sSliceHeader;
-
-  int32_t iScanIdxStart = pSlice->sSliceHeaderExt.uiScanIdxStart;
-  int32_t iScanIdxEnd   = pSlice->sSliceHeaderExt.uiScanIdxEnd;
-
-  SWelsNeighAvail sNeighAvail;
-  int32_t iMbX = pCurLayer->iMbX;
-  int32_t iMbY = pCurLayer->iMbY;
-  const int32_t iMbXy = pCurLayer->iMbXyIndex;
-  int8_t* pNzc = pCurLayer->pNzc[iMbXy];
-  int32_t i;
-  int32_t iRet = ERR_NONE;
-  uint32_t uiMbType = 0, uiCbp = 0, uiCbpL = 0, uiCbpC = 0;
-  uint32_t uiCode;
-  int32_t iCode;
-  int32_t iMbResProperty;
-
-  GetNeighborAvailMbType (&sNeighAvail, pCurLayer);
-  ENFORCE_STACK_ALIGN_1D (uint8_t, pNonZeroCount, 48, 16);
-  pCurLayer->pInterPredictionDoneFlag[iMbXy] = 0;//2009.10.23
-  WELS_READ_VERIFY (BsGetUe (pBs, &uiCode)); //uiMbType
-  uiMbType = uiCode;
-  if (uiMbType < 5) { //inter MB type
-    int16_t iMotionVector[LIST_A][30][MV_A];
-    int8_t  iRefIndex[LIST_A][30];
-    pCurLayer->pMbType[iMbXy] = g_ksInterMbTypeInfo[uiMbType].iType;
-    WelsFillCacheInter (&sNeighAvail, pNonZeroCount, iMotionVector, iRefIndex, pCurLayer);
-
-    if ((iRet = ParseInterInfo (pCtx, iMotionVector, iRefIndex, pBs)) != ERR_NONE) {
-      return iRet;//abnormal
-    }
-
-    if (pSlice->sSliceHeaderExt.bAdaptiveResidualPredFlag == 1) {
-      WELS_READ_VERIFY (BsGetOneBit (pBs, &uiCode)); //residual_prediction_flag
-      pCurLayer->pResidualPredFlag[iMbXy] =  uiCode;
-    } else {
-      pCurLayer->pResidualPredFlag[iMbXy] = pSlice->sSliceHeaderExt.bDefaultResidualPredFlag;
-    }
-
-    if (pCurLayer->pResidualPredFlag[iMbXy] == 0) {
-      pCurLayer->pInterPredictionDoneFlag[iMbXy] = 0;
-    } else {
-      WelsLog (& (pCtx->sLogCtx), WELS_LOG_WARNING, "residual_pred_flag = 1 not supported.");
-      return GENERATE_ERROR_NO (ERR_LEVEL_MB_DATA, ERR_INFO_UNSUPPORTED_ILP);
-    }
-  } else { //intra MB type
-    uiMbType -= 5;
-    if (uiMbType > 25)
-      return GENERATE_ERROR_NO (ERR_LEVEL_MB_DATA, ERR_INFO_INVALID_MB_TYPE);
-    if (!pCtx->pSps->uiChromaFormatIdc && ((uiMbType >= 5 && uiMbType <= 12) || (uiMbType >= 17 && uiMbType <= 24)))
-      return GENERATE_ERROR_NO (ERR_LEVEL_MB_DATA, ERR_INFO_INVALID_MB_TYPE);
-
-    if (25 == uiMbType) {
-      WelsLog (& (pCtx->sLogCtx), WELS_LOG_DEBUG, "I_PCM mode exists in P slice!");
-      int32_t iDecStrideL = pCurLayer->pDec->iLinesize[0];
-      int32_t iDecStrideC = pCurLayer->pDec->iLinesize[1];
-
-      int32_t iOffsetL = (iMbX + iMbY * iDecStrideL) << 4;
-      int32_t iOffsetC = (iMbX + iMbY * iDecStrideC) << 3;
-
-      uint8_t* pDecY = pCurLayer->pDec->pData[0] + iOffsetL;
-      uint8_t* pDecU = pCurLayer->pDec->pData[1] + iOffsetC;
-      uint8_t* pDecV = pCurLayer->pDec->pData[2] + iOffsetC;
-
-      uint8_t* pTmpBsBuf;
-
-      int32_t i;
-      int32_t iCopySizeY  = (sizeof (uint8_t) << 4);
-      int32_t iCopySizeUV = (sizeof (uint8_t) << 3);
-
-      int32_t iIndex = ((-pBs->iLeftBits) >> 3) + 2;
-
-      pCurLayer->pMbType[iMbXy] = MB_TYPE_INTRA_PCM;
-
-      //step 1: locating bit-stream pointer [must align into integer byte]
-      pBs->pCurBuf -= iIndex;
-
-      //step 2: copy pixel from bit-stream into fdec [reconstruction]
-      pTmpBsBuf = pBs->pCurBuf;
-      if (!pCtx->pParam->bParseOnly) {
-        for (i = 0; i < 16; i++) { //luma
-          memcpy (pDecY, pTmpBsBuf, iCopySizeY);
-          pDecY += iDecStrideL;
-          pTmpBsBuf += 16;
-        }
-
-        for (i = 0; i < 8; i++) { //cb
-          memcpy (pDecU, pTmpBsBuf, iCopySizeUV);
-          pDecU += iDecStrideC;
-          pTmpBsBuf += 8;
-        }
-        for (i = 0; i < 8; i++) { //cr
-          memcpy (pDecV, pTmpBsBuf, iCopySizeUV);
-          pDecV += iDecStrideC;
-          pTmpBsBuf += 8;
-        }
-      }
-
-      pBs->pCurBuf += 384;
-
-      //step 3: update QP and pNonZeroCount
-      pCurLayer->pLumaQp[iMbXy] = 0;
-      pCurLayer->pChromaQp[iMbXy][0] = pCurLayer->pChromaQp[iMbXy][1] = 0;
-      //Rec. 9.2.1 for PCM, nzc=16
-      ST32A4 (&pNzc[0], 0x10101010);
-      ST32A4 (&pNzc[4], 0x10101010);
-      ST32A4 (&pNzc[8], 0x10101010);
-      ST32A4 (&pNzc[12], 0x10101010);
-      ST32A4 (&pNzc[16], 0x10101010);
-      ST32A4 (&pNzc[20], 0x10101010);
-      WELS_READ_VERIFY (InitReadBits (pBs, 0));
-      return ERR_NONE;
-    } else {
-      if (0 == uiMbType) {
-        ENFORCE_STACK_ALIGN_1D (int8_t, pIntraPredMode, 48, 16);
-        pCurLayer->pMbType[iMbXy] = MB_TYPE_INTRA4x4;
-        if (pCtx->pPps->bTransform8x8ModeFlag) {
-          WELS_READ_VERIFY (BsGetOneBit (pBs, &uiCode)); //transform_size_8x8_flag
-          pCurLayer->pTransformSize8x8Flag[iMbXy] = !!uiCode;
-          if (pCurLayer->pTransformSize8x8Flag[iMbXy]) {
-            uiMbType = pCurLayer->pMbType[iMbXy] = MB_TYPE_INTRA8x8;
-          }
-        }
-        if (!pCurLayer->pTransformSize8x8Flag[iMbXy]) {
-          pCtx->pFillInfoCacheIntraNxNFunc (&sNeighAvail, pNonZeroCount, pIntraPredMode, pCurLayer);
-          WELS_READ_VERIFY (ParseIntra4x4Mode (pCtx, &sNeighAvail, pIntraPredMode, pBs, pCurLayer));
-        } else {
-          pCtx->pFillInfoCacheIntraNxNFunc (&sNeighAvail, pNonZeroCount, pIntraPredMode, pCurLayer);
-          WELS_READ_VERIFY (ParseIntra8x8Mode (pCtx, &sNeighAvail, pIntraPredMode, pBs, pCurLayer));
-        }
-      } else { //I_PCM exclude, we can ignore it
-        pCurLayer->pMbType[iMbXy] = MB_TYPE_INTRA16x16;
-        pCurLayer->pTransformSize8x8Flag[iMbXy] = false;
-        pCurLayer->pNoSubMbPartSizeLessThan8x8Flag[iMbXy] = true;
-        pCurLayer->pIntraPredMode[iMbXy][7] = (uiMbType - 1) & 3;
-        pCurLayer->pCbp[iMbXy] = g_kuiI16CbpTable[ (uiMbType - 1) >> 2];
-        uiCbpC = pCtx->pSps->uiChromaFormatIdc ? pCurLayer->pCbp[iMbXy] >> 4 : 0;
-        uiCbpL = pCurLayer->pCbp[iMbXy] & 15;
-        WelsFillCacheNonZeroCount (&sNeighAvail, pNonZeroCount, pCurLayer);
-        if ((iRet = ParseIntra16x16Mode (pCtx, &sNeighAvail, pBs, pCurLayer)) != ERR_NONE) {
-          return iRet;
-        }
-      }
-    }
-  }
-
-  if (MB_TYPE_INTRA16x16 != pCurLayer->pMbType[iMbXy]) {
-    WELS_READ_VERIFY (BsGetUe (pBs, &uiCode)); //coded_block_pattern
-    uiCbp = uiCode;
-    {
-      if (pCtx->pSps->uiChromaFormatIdc && (uiCbp > 47))
-        return GENERATE_ERROR_NO (ERR_LEVEL_MB_DATA, ERR_INFO_INVALID_CBP);
-      if (!pCtx->pSps->uiChromaFormatIdc && (uiCbp > 15))
-        return GENERATE_ERROR_NO (ERR_LEVEL_MB_DATA, ERR_INFO_INVALID_CBP);
-      if (MB_TYPE_INTRA4x4 == pCurLayer->pMbType[iMbXy] || MB_TYPE_INTRA8x8 == pCurLayer->pMbType[iMbXy]) {
-
-        uiCbp = pCtx->pSps->uiChromaFormatIdc ? g_kuiIntra4x4CbpTable[uiCbp] : g_kuiIntra4x4CbpTable400[uiCbp];
-      } else //inter
-        uiCbp = pCtx->pSps->uiChromaFormatIdc ?  g_kuiInterCbpTable[uiCbp] : g_kuiInterCbpTable400[uiCbp];
-    }
-
-    pCurLayer->pCbp[iMbXy] = uiCbp;
-    uiCbpC = pCurLayer->pCbp[iMbXy] >> 4;
-    uiCbpL = pCurLayer->pCbp[iMbXy] & 15;
-
-    // Need modification when B picutre add in
-    bool bNeedParseTransformSize8x8Flag =
-      (((pCurLayer->pMbType[iMbXy] >= MB_TYPE_16x16 && pCurLayer->pMbType[iMbXy] <= MB_TYPE_8x16)
-        || pCurLayer->pNoSubMbPartSizeLessThan8x8Flag[iMbXy])
-       && (pCurLayer->pMbType[iMbXy] != MB_TYPE_INTRA8x8)
-       && (pCurLayer->pMbType[iMbXy] != MB_TYPE_INTRA4x4)
-       && (uiCbpL > 0)
-       && (pCtx->pPps->bTransform8x8ModeFlag));
-
-    if (bNeedParseTransformSize8x8Flag) {
-      WELS_READ_VERIFY (BsGetOneBit (pBs, &uiCode)); //transform_size_8x8_flag
-      pCurLayer->pTransformSize8x8Flag[iMbXy] = !!uiCode;
-    }
-  }
-
-  ST32A4 (&pNzc[0], 0);
-  ST32A4 (&pNzc[4], 0);
-  ST32A4 (&pNzc[8], 0);
-  ST32A4 (&pNzc[12], 0);
-  ST32A4 (&pNzc[16], 0);
-  ST32A4 (&pNzc[20], 0);
-  if (pCurLayer->pCbp[iMbXy] == 0 && !IS_INTRA16x16 (pCurLayer->pMbType[iMbXy]) && !IS_I_BL (pCurLayer->pMbType[iMbXy])) {
-    pCurLayer->pLumaQp[iMbXy] = pSlice->iLastMbQp;
-    for (i = 0; i < 2; i++) {
-      pCurLayer->pChromaQp[iMbXy][i] = g_kuiChromaQpTable[WELS_CLIP3 (pCurLayer->pLumaQp[iMbXy] +
-                                       pSliceHeader->pPps->iChromaQpIndexOffset[i], 0, 51)];
-    }
-  }
-
-  if (pCurLayer->pCbp[iMbXy] || MB_TYPE_INTRA16x16 == pCurLayer->pMbType[iMbXy]) {
-    int32_t iQpDelta, iId8x8, iId4x4;
-    memset (pCurLayer->pScaledTCoeff[iMbXy], 0, MB_COEFF_LIST_SIZE * sizeof (int16_t));
-    WELS_READ_VERIFY (BsGetSe (pBs, &iCode)); //mb_qp_delta
-    iQpDelta = iCode;
-
-    if (iQpDelta > 25 || iQpDelta < -26) { //out of iQpDelta range
-      return GENERATE_ERROR_NO (ERR_LEVEL_MB_DATA, ERR_INFO_INVALID_QP);
-    }
-
-    pCurLayer->pLumaQp[iMbXy] = (pSlice->iLastMbQp + iQpDelta + 52) % 52; //update last_mb_qp
-    pSlice->iLastMbQp = pCurLayer->pLumaQp[iMbXy];
-    for (i = 0; i < 2; i++) {
-      pCurLayer->pChromaQp[iMbXy][i] = g_kuiChromaQpTable[WELS_CLIP3 (pSlice->iLastMbQp +
-                                       pSliceHeader->pPps->iChromaQpIndexOffset[i], 0,
-                                       51)];
-    }
-
-    BsStartCavlc (pBs);
-
-    if (MB_TYPE_INTRA16x16 == pCurLayer->pMbType[iMbXy]) {
-      //step1: Luma DC
-      if ((iRet = WelsResidualBlockCavlc (pVlcTable, pNonZeroCount, pBs, 0, 16, g_kuiLumaDcZigzagScan, I16_LUMA_DC,
-                                          pCurLayer->pScaledTCoeff[iMbXy], pCurLayer->pLumaQp[iMbXy], pCtx)) != ERR_NONE) {
-        return iRet;//abnormal
-      }
-      //step2: Luma AC
-      if (uiCbpL) {
-        for (i = 0; i < 16; i++) {
-          if ((iRet = WelsResidualBlockCavlc (pVlcTable, pNonZeroCount, pBs, i, iScanIdxEnd - WELS_MAX (iScanIdxStart, 1) + 1,
-                                              g_kuiZigzagScan + WELS_MAX (iScanIdxStart, 1), I16_LUMA_AC, pCurLayer->pScaledTCoeff[iMbXy] + (i << 4),
-                                              pCurLayer->pLumaQp[iMbXy], pCtx)) != ERR_NONE) {
-            return iRet;//abnormal
-          }
-        }
-        ST32A4 (&pNzc[0], LD32 (&pNonZeroCount[1 + 8 * 1]));
-        ST32A4 (&pNzc[4], LD32 (&pNonZeroCount[1 + 8 * 2]));
-        ST32A4 (&pNzc[8], LD32 (&pNonZeroCount[1 + 8 * 3]));
-        ST32A4 (&pNzc[12], LD32 (&pNonZeroCount[1 + 8 * 4]));
-      }
-    } else { //non-MB_TYPE_INTRA16x16
-      if (pCurLayer->pTransformSize8x8Flag[iMbXy]) {
-        for (iId8x8 = 0; iId8x8 < 4; iId8x8++) {
-          iMbResProperty = (IS_INTRA (pCurLayer->pMbType[iMbXy])) ? LUMA_DC_AC_INTRA_8 : LUMA_DC_AC_INTER_8;
-          if (uiCbpL & (1 << iId8x8)) {
-            int32_t iIndex = (iId8x8 << 2);
-            for (iId4x4 = 0; iId4x4 < 4; iId4x4++) {
-              if ((iRet = WelsResidualBlockCavlc8x8 (pVlcTable, pNonZeroCount, pBs, iIndex, iScanIdxEnd - iScanIdxStart + 1,
-                                                     g_kuiZigzagScan8x8 + iScanIdxStart, iMbResProperty, pCurLayer->pScaledTCoeff[iMbXy] + (iId8x8 << 6), iId4x4,
-                                                     pCurLayer->pLumaQp[iMbXy], pCtx)) != ERR_NONE) {
-                return iRet;
-              }
-              iIndex++;
-            }
-          } else {
-            ST16 (&pNonZeroCount[g_kuiCache48CountScan4Idx[iId8x8 << 2]], 0);
-            ST16 (&pNonZeroCount[g_kuiCache48CountScan4Idx[ (iId8x8 << 2) + 2]], 0);
-          }
-        }
-        ST32A4 (&pNzc[0], LD32 (&pNonZeroCount[1 + 8 * 1]));
-        ST32A4 (&pNzc[4], LD32 (&pNonZeroCount[1 + 8 * 2]));
-        ST32A4 (&pNzc[8], LD32 (&pNonZeroCount[1 + 8 * 3]));
-        ST32A4 (&pNzc[12], LD32 (&pNonZeroCount[1 + 8 * 4]));
-      } else { // Normal T4x4
-        for (iId8x8 = 0; iId8x8 < 4; iId8x8++) {
-          iMbResProperty = (IS_INTRA (pCurLayer->pMbType[iMbXy])) ? LUMA_DC_AC_INTRA : LUMA_DC_AC_INTER;
-          if (uiCbpL & (1 << iId8x8)) {
-            int32_t iIndex = (iId8x8 << 2);
-            for (iId4x4 = 0; iId4x4 < 4; iId4x4++) {
-              //Luma (DC and AC decoding together)
-              if ((iRet = WelsResidualBlockCavlc (pVlcTable, pNonZeroCount, pBs, iIndex, iScanIdxEnd - iScanIdxStart + 1,
-                                                  g_kuiZigzagScan + iScanIdxStart, iMbResProperty, pCurLayer->pScaledTCoeff[iMbXy] + (iIndex << 4),
-                                                  pCurLayer->pLumaQp[iMbXy], pCtx)) != ERR_NONE) {
-                return iRet;//abnormal
-              }
-              iIndex++;
-            }
-          } else {
-            ST16 (&pNonZeroCount[g_kuiCache48CountScan4Idx[iId8x8 << 2]], 0);
-            ST16 (&pNonZeroCount[g_kuiCache48CountScan4Idx[ (iId8x8 << 2) + 2]], 0);
-          }
-        }
-        ST32A4 (&pNzc[0], LD32 (&pNonZeroCount[1 + 8 * 1]));
-        ST32A4 (&pNzc[4], LD32 (&pNonZeroCount[1 + 8 * 2]));
-        ST32A4 (&pNzc[8], LD32 (&pNonZeroCount[1 + 8 * 3]));
-        ST32A4 (&pNzc[12], LD32 (&pNonZeroCount[1 + 8 * 4]));
-      }
-    }
-
-
-    //chroma
-    //step1: DC
-    if (1 == uiCbpC || 2 == uiCbpC) {
-      for (i = 0; i < 2; i++) { //Cb Cr
-        if (IS_INTRA (pCurLayer->pMbType[iMbXy]))
-          iMbResProperty = i ? CHROMA_DC_V : CHROMA_DC_U;
-        else
-          iMbResProperty = i ? CHROMA_DC_V_INTER : CHROMA_DC_U_INTER;
-
-        if ((iRet = WelsResidualBlockCavlc (pVlcTable, pNonZeroCount, pBs, 16 + (i << 2), 4, g_kuiChromaDcScan, iMbResProperty,
-                                            pCurLayer->pScaledTCoeff[iMbXy] + 256 + (i << 6), pCurLayer->pChromaQp[iMbXy][i], pCtx)) != ERR_NONE) {
-          return iRet;//abnormal
-        }
-      }
-    } else {
-    }
-    //step2: AC
-    if (2 == uiCbpC) {
-      for (i = 0; i < 2; i++) { //Cb Cr
-        if (IS_INTRA (pCurLayer->pMbType[iMbXy]))
-          iMbResProperty = i ? CHROMA_AC_V : CHROMA_AC_U;
-        else
-          iMbResProperty = i ? CHROMA_AC_V_INTER : CHROMA_AC_U_INTER;
-
-        int32_t iIndex = 16 + (i << 2);
-        for (iId4x4 = 0; iId4x4 < 4; iId4x4++) {
-          if ((iRet = WelsResidualBlockCavlc (pVlcTable, pNonZeroCount, pBs, iIndex, iScanIdxEnd - WELS_MAX (iScanIdxStart,
-                                              1) + 1, g_kuiZigzagScan + WELS_MAX (iScanIdxStart, 1), iMbResProperty, pCurLayer->pScaledTCoeff[iMbXy] + (iIndex << 4),
-                                              pCurLayer->pChromaQp[iMbXy][i], pCtx)) != ERR_NONE) {
-            return iRet;//abnormal
-          }
-          iIndex++;
-        }
-      }
-      ST16A2 (&pNzc[16], LD16A2 (&pNonZeroCount[6 + 8 * 1]));
-      ST16A2 (&pNzc[20], LD16A2 (&pNonZeroCount[6 + 8 * 2]));
-      ST16A2 (&pNzc[18], LD16A2 (&pNonZeroCount[6 + 8 * 4]));
-      ST16A2 (&pNzc[22], LD16A2 (&pNonZeroCount[6 + 8 * 5]));
-    }
-    BsEndCavlc (pBs);
-  }
-
-  return ERR_NONE;
-}
-
-int32_t WelsDecodeMbCavlcPSlice (PWelsDecoderContext pCtx, PNalUnit pNalCur, uint32_t& uiEosFlag) {
-  PDqLayer pCurLayer             = pCtx->pCurDqLayer;
-  PBitStringAux pBs              = pCurLayer->pBitStringAux;
-  PSlice pSlice                  = &pCurLayer->sLayerInfo.sSliceInLayer;
-  PSliceHeader pSliceHeader      = &pSlice->sSliceHeaderExt.sSliceHeader;
-  PPicture* ppRefPic = pCtx->sRefPic.pRefList[LIST_0];
-  intX_t iUsedBits;
-  const int32_t iMbXy = pCurLayer->iMbXyIndex;
-  int8_t* pNzc = pCurLayer->pNzc[iMbXy];
-  int32_t iBaseModeFlag, i;
-  int32_t iRet = 0; //should have the return value to indicate decoding error or not, It's NECESSARY--2010.4.15
-  uint32_t uiCode;
-
-  pCurLayer->pNoSubMbPartSizeLessThan8x8Flag[iMbXy] = true;
-  pCurLayer->pTransformSize8x8Flag[iMbXy] = false;
-
-  if (-1 == pSlice->iMbSkipRun) {
-    WELS_READ_VERIFY (BsGetUe (pBs, &uiCode)); //mb_skip_run
-    pSlice->iMbSkipRun = uiCode;
-    if (-1 == pSlice->iMbSkipRun) {
-      return GENERATE_ERROR_NO (ERR_LEVEL_MB_DATA, ERR_INFO_INVALID_MB_SKIP_RUN);
-    }
-  }
-  if (pSlice->iMbSkipRun--) {
-    int16_t iMv[2];
-
-    pCurLayer->pMbType[iMbXy] = MB_TYPE_SKIP;
-    ST32A4 (&pNzc[0], 0);
-    ST32A4 (&pNzc[4], 0);
-    ST32A4 (&pNzc[8], 0);
-    ST32A4 (&pNzc[12], 0);
-    ST32A4 (&pNzc[16], 0);
-    ST32A4 (&pNzc[20], 0);
-
-    pCurLayer->pInterPredictionDoneFlag[iMbXy] = 0;
-    memset (pCurLayer->pRefIndex[0][iMbXy], 0, sizeof (int8_t) * 16);
-    pCtx->bMbRefConcealed = pCtx->bRPLRError || pCtx->bMbRefConcealed || ! (ppRefPic[0] && ppRefPic[0]->bIsComplete);
-    //predict iMv
-    PredPSkipMvFromNeighbor (pCurLayer, iMv);
-    for (i = 0; i < 16; i++) {
-      ST32A2 (pCurLayer->pMv[0][iMbXy][i], * (uint32_t*)iMv);
-    }
-
-    //if (!pSlice->sSliceHeaderExt.bDefaultResidualPredFlag) {
-    //  memset (pCurLayer->pScaledTCoeff[iMbXy], 0, 384 * sizeof (int16_t));
-    //}
-
-    //reset rS
-    if (!pSlice->sSliceHeaderExt.bDefaultResidualPredFlag ||
-        (pNalCur->sNalHeaderExt.uiQualityId == 0 && pNalCur->sNalHeaderExt.uiDependencyId == 0)) {
-      pCurLayer->pLumaQp[iMbXy] = pSlice->iLastMbQp;
-      for (i = 0; i < 2; i++) {
-        pCurLayer->pChromaQp[iMbXy][i] = g_kuiChromaQpTable[WELS_CLIP3 (pCurLayer->pLumaQp[iMbXy] +
-                                         pSliceHeader->pPps->iChromaQpIndexOffset[i], 0, 51)];
-      }
-    }
-
-    pCurLayer->pCbp[iMbXy] = 0;
-  } else {
-    if (pSlice->sSliceHeaderExt.bAdaptiveBaseModeFlag == 1) {
-      WELS_READ_VERIFY (BsGetOneBit (pBs, &uiCode)); //base_mode_flag
-      iBaseModeFlag = uiCode;
-    } else {
-      iBaseModeFlag = pSlice->sSliceHeaderExt.bDefaultBaseModeFlag;
-    }
-    if (!iBaseModeFlag) {
-      iRet = WelsActualDecodeMbCavlcPSlice (pCtx);
-    } else {
-      WelsLog (& (pCtx->sLogCtx), WELS_LOG_WARNING, "iBaseModeFlag (%d) != 0, inter-layer prediction not supported.",
-               iBaseModeFlag);
-      return GENERATE_ERROR_NO (ERR_LEVEL_SLICE_HEADER, ERR_INFO_UNSUPPORTED_ILP);
-    }
-    if (iRet) { //occur error when parsing, MUST STOP decoding
-      return iRet;
-    }
-  }
-  // check whether there is left bits to read next time in case multiple slices
-  iUsedBits = ((pBs->pCurBuf - pBs->pStartBuf) << 3) - (16 - pBs->iLeftBits);
-  // sub 1, for stop bit
-  if ((iUsedBits == (pBs->iBits - 1)) && (0 >= pCurLayer->sLayerInfo.sSliceInLayer.iMbSkipRun)) { // slice boundary
-    uiEosFlag = 1;
-  }
-  if (iUsedBits > (pBs->iBits -
-                   1)) { //When BS incomplete, as long as find it, SHOULD stop decoding to avoid mosaic or crash.
-    WelsLog (& (pCtx->sLogCtx), WELS_LOG_WARNING,
-             "WelsDecodeMbCavlcISlice()::::pBs incomplete, iUsedBits:%" PRId64 " > pBs->iBits:%d, MUST stop decoding.",
-             (int64_t) iUsedBits, pBs->iBits);
-    return GENERATE_ERROR_NO (ERR_LEVEL_MB_DATA, ERR_INFO_BS_INCOMPLETE);
-  }
-  return ERR_NONE;
-}
-
-void WelsBlockFuncInit (SBlockFunc*   pFunc,  int32_t iCpu) {
-  pFunc->pWelsSetNonZeroCountFunc   = WelsNonZeroCount_c;
-  pFunc->pWelsBlockZero16x16Func    = WelsBlockZero16x16_c;
-  pFunc->pWelsBlockZero8x8Func      = WelsBlockZero8x8_c;
-
-#ifdef HAVE_NEON
-  if (iCpu & WELS_CPU_NEON) {
-    pFunc->pWelsSetNonZeroCountFunc = WelsNonZeroCount_neon;
-    pFunc->pWelsBlockZero16x16Func  = WelsBlockZero16x16_neon;
-    pFunc->pWelsBlockZero8x8Func    = WelsBlockZero8x8_neon;
-  }
-#endif
-
-#ifdef HAVE_NEON_AARCH64
-  if (iCpu & WELS_CPU_NEON) {
-    pFunc->pWelsSetNonZeroCountFunc = WelsNonZeroCount_AArch64_neon;
-    pFunc->pWelsBlockZero16x16Func  = WelsBlockZero16x16_AArch64_neon;
-    pFunc->pWelsBlockZero8x8Func    = WelsBlockZero8x8_AArch64_neon;
-  }
-#endif
-
-#if defined(X86_ASM)
-  if (iCpu & WELS_CPU_SSE2) {
-    pFunc->pWelsSetNonZeroCountFunc = WelsNonZeroCount_sse2;
-    pFunc->pWelsBlockZero16x16Func  = WelsBlockZero16x16_sse2;
-    pFunc->pWelsBlockZero8x8Func    = WelsBlockZero8x8_sse2;
-  }
-#endif
-
-}
-
-void WelsBlockInit (int16_t* pBlock, int iW, int iH, int iStride, uint8_t uiVal) {
-  int32_t i;
-  int16_t* pDst = pBlock;
-
-  for (i = 0; i < iH; i++) {
-    memset (pDst, uiVal, iW * sizeof (int16_t));
-    pDst += iStride;
-  }
-}
-void WelsBlockZero16x16_c (int16_t* pBlock, int32_t iStride) {
-  WelsBlockInit (pBlock, 16, 16, iStride, 0);
-}
-
-void WelsBlockZero8x8_c (int16_t* pBlock, int32_t iStride) {
-  WelsBlockInit (pBlock, 8, 8, iStride, 0);
-}
-
-} // namespace WelsDec
+/*!
+ * \copy
+ *     Copyright (c)  2008-2013, Cisco Systems
+ *     All rights reserved.
+ *
+ *     Redistribution and use in source and binary forms, with or without
+ *     modification, are permitted provided that the following conditions
+ *     are met:
+ *
+ *        * Redistributions of source code must retain the above copyright
+ *          notice, this list of conditions and the following disclaimer.
+ *
+ *        * Redistributions in binary form must reproduce the above copyright
+ *          notice, this list of conditions and the following disclaimer in
+ *          the documentation and/or other materials provided with the
+ *          distribution.
+ *
+ *     THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *     "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *     LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+ *     FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+ *     COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ *     INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ *     BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ *     LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+ *     CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ *     LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
+ *     ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ *     POSSIBILITY OF SUCH DAMAGE.
+ *
+ *
+ *  Abstract
+ *      current slice decoding
+ *
+ *  History
+ *      07/10/2008 Created
+ *      08/09/2013 Modified
+ *
+ *****************************************************************************/
+
+
+#include "deblocking.h"
+
+#include "decode_slice.h"
+
+#include "parse_mb_syn_cavlc.h"
+#include "parse_mb_syn_cabac.h"
+#include "rec_mb.h"
+#include "mv_pred.h"
+
+#include "cpu_core.h"
+
+namespace WelsDec {
+
+static inline int32_t iAbs (int32_t x) {
+  static const int32_t INT_BITS = (sizeof (int) * CHAR_BIT) - 1;
+  int32_t y = x >> INT_BITS;
+  return (x ^ y) - y;
+}
+
+extern PPicture AllocPicture (PWelsDecoderContext pCtx, const int32_t kiPicWidth, const int32_t kiPicHeight);
+
+int32_t WelsTargetSliceConstruction (PWelsDecoderContext pCtx) {
+  PDqLayer pCurLayer = pCtx->pCurDqLayer;
+  PSlice pCurSlice = &pCurLayer->sLayerInfo.sSliceInLayer;
+  PSliceHeader pSliceHeader = &pCurSlice->sSliceHeaderExt.sSliceHeader;
+
+  int32_t iTotalMbTargetLayer = pSliceHeader->pSps->uiTotalMbCount;
+
+  int32_t iCurLayerWidth  = pCurLayer->iMbWidth << 4;
+  int32_t iCurLayerHeight = pCurLayer->iMbHeight << 4;
+
+  int32_t iNextMbXyIndex = 0;
+  PFmo pFmo = pCtx->pFmo;
+
+  int32_t iTotalNumMb = pCurSlice->iTotalMbInCurSlice;
+  int32_t iCountNumMb = 0;
+  PDeblockingFilterMbFunc pDeblockMb;
+
+  if (!pCtx->bAvcBasedFlag && iCurLayerWidth != pCtx->iCurSeqIntervalMaxPicWidth) {
+    return ERR_INFO_WIDTH_MISMATCH;
+  }
+
+  iNextMbXyIndex   = pSliceHeader->iFirstMbInSlice;
+  pCurLayer->iMbX  = iNextMbXyIndex % pCurLayer->iMbWidth;
+  pCurLayer->iMbY  = iNextMbXyIndex / pCurLayer->iMbWidth;
+  pCurLayer->iMbXyIndex = iNextMbXyIndex;
+
+  if (0 == iNextMbXyIndex) {
+    pCurLayer->pDec->iSpsId = pCtx->pSps->iSpsId;
+    pCurLayer->pDec->iPpsId = pCtx->pPps->iPpsId;
+
+    pCurLayer->pDec->uiQualityId = pCurLayer->sLayerInfo.sNalHeaderExt.uiQualityId;
+  }
+
+  do {
+    if (iCountNumMb >= iTotalNumMb) {
+      break;
+    }
+
+    if (!pCtx->pParam->bParseOnly) { //for parse only, actual recon MB unnecessary
+      if (WelsTargetMbConstruction (pCtx)) {
+        WelsLog (& (pCtx->sLogCtx), WELS_LOG_WARNING,
+                 "WelsTargetSliceConstruction():::MB(%d, %d) construction error. pCurSlice_type:%d",
+                 pCurLayer->iMbX, pCurLayer->iMbY, pCurSlice->eSliceType);
+
+        return ERR_INFO_MB_RECON_FAIL;
+      }
+    }
+
+    ++iCountNumMb;
+    if (!pCurLayer->pMbCorrectlyDecodedFlag[iNextMbXyIndex]) { //already con-ed, overwrite
+      pCurLayer->pMbCorrectlyDecodedFlag[iNextMbXyIndex] = true;
+      pCtx->pDec->iMbEcedPropNum += (pCurLayer->pMbRefConcealedFlag[iNextMbXyIndex] ? 1 : 0);
+      ++pCtx->iTotalNumMbRec;
+    }
+
+    if (pCtx->iTotalNumMbRec > iTotalMbTargetLayer) {
+      WelsLog (& (pCtx->sLogCtx), WELS_LOG_WARNING,
+               "WelsTargetSliceConstruction():::pCtx->iTotalNumMbRec:%d, iTotalMbTargetLayer:%d",
+               pCtx->iTotalNumMbRec, iTotalMbTargetLayer);
+
+      return ERR_INFO_MB_NUM_EXCEED_FAIL;
+    }
+
+    if (pSliceHeader->pPps->uiNumSliceGroups > 1) {
+      iNextMbXyIndex = FmoNextMb (pFmo, iNextMbXyIndex);
+    } else {
+      ++iNextMbXyIndex;
+    }
+    if (-1 == iNextMbXyIndex || iNextMbXyIndex >= iTotalMbTargetLayer) { // slice group boundary or end of a frame
+      break;
+    }
+    pCurLayer->iMbX  = iNextMbXyIndex % pCurLayer->iMbWidth;
+    pCurLayer->iMbY  = iNextMbXyIndex / pCurLayer->iMbWidth;
+    pCurLayer->iMbXyIndex = iNextMbXyIndex;
+  } while (1);
+
+  pCtx->pDec->iWidthInPixel  = iCurLayerWidth;
+  pCtx->pDec->iHeightInPixel = iCurLayerHeight;
+
+  if ((pCurSlice->eSliceType != I_SLICE) && (pCurSlice->eSliceType != P_SLICE) && (pCurSlice->eSliceType != B_SLICE))
+    return ERR_NONE; //no error but just ignore the type unsupported
+
+  if (pCtx->pParam->bParseOnly) //for parse only, deblocking should not go on
+    return ERR_NONE;
+
+  pDeblockMb = WelsDeblockingMb;
+
+  if (1 == pSliceHeader->uiDisableDeblockingFilterIdc
+      || pCtx->pCurDqLayer->sLayerInfo.sSliceInLayer.iTotalMbInCurSlice <= 0) {
+    return ERR_NONE;//NO_SUPPORTED_FILTER_IDX
+  } else {
+    WelsDeblockingFilterSlice (pCtx, pDeblockMb);
+  }
+  // any other filter_idc not supported here, 7/22/2010
+
+  return ERR_NONE;
+}
+
+int32_t WelsMbInterSampleConstruction (PWelsDecoderContext pCtx, PDqLayer pCurLayer,
+                                       uint8_t* pDstY, uint8_t* pDstU, uint8_t* pDstV, int32_t iStrideL, int32_t iStrideC) {
+  int32_t iMbXy = pCurLayer->iMbXyIndex;
+  int32_t i, iIndex, iOffset;
+
+  if (pCurLayer->pTransformSize8x8Flag[iMbXy]) {
+    for (i = 0; i < 4; i++) {
+      iIndex = g_kuiMbCountScan4Idx[i << 2];
+      if (pCurLayer->pNzc[iMbXy][iIndex] || pCurLayer->pNzc[iMbXy][iIndex + 1] || pCurLayer->pNzc[iMbXy][iIndex + 4]
+          || pCurLayer->pNzc[iMbXy][iIndex + 5]) {
+        iOffset = ((iIndex >> 2) << 2) * iStrideL + ((iIndex % 4) << 2);
+        pCtx->pIdctResAddPredFunc8x8 (pDstY + iOffset, iStrideL, pCurLayer->pScaledTCoeff[iMbXy] + (i << 6));
+      }
+    }
+  } else {
+    // luma.
+    const int8_t* pNzc = pCurLayer->pNzc[iMbXy];
+    int16_t* pScaledTCoeff = pCurLayer->pScaledTCoeff[iMbXy];
+    pCtx->pIdctFourResAddPredFunc (pDstY + 0 * iStrideL + 0, iStrideL, pScaledTCoeff + 0 * 64, pNzc +  0);
+    pCtx->pIdctFourResAddPredFunc (pDstY + 0 * iStrideL + 8, iStrideL, pScaledTCoeff + 1 * 64, pNzc +  2);
+    pCtx->pIdctFourResAddPredFunc (pDstY + 8 * iStrideL + 0, iStrideL, pScaledTCoeff + 2 * 64, pNzc +  8);
+    pCtx->pIdctFourResAddPredFunc (pDstY + 8 * iStrideL + 8, iStrideL, pScaledTCoeff + 3 * 64, pNzc + 10);
+  }
+
+  const int8_t* pNzc = pCurLayer->pNzc[iMbXy];
+  int16_t* pScaledTCoeff = pCurLayer->pScaledTCoeff[iMbXy];
+  // Cb.
+  pCtx->pIdctFourResAddPredFunc (pDstU, iStrideC, pScaledTCoeff + 4 * 64, pNzc + 16);
+  // Cr.
+  pCtx->pIdctFourResAddPredFunc (pDstV, iStrideC, pScaledTCoeff + 5 * 64, pNzc + 18);
+
+  return ERR_NONE;
+}
+int32_t WelsMbInterConstruction (PWelsDecoderContext pCtx, PDqLayer pCurLayer) {
+  int32_t iMbX = pCurLayer->iMbX;
+  int32_t iMbY = pCurLayer->iMbY;
+  uint8_t*  pDstY, *pDstCb, *pDstCr;
+
+  int32_t iLumaStride   = pCtx->pDec->iLinesize[0];
+  int32_t iChromaStride = pCtx->pDec->iLinesize[1];
+
+  pDstY  = pCurLayer->pDec->pData[0] + ((iMbY * iLumaStride + iMbX) << 4);
+  pDstCb = pCurLayer->pDec->pData[1] + ((iMbY * iChromaStride + iMbX) << 3);
+  pDstCr = pCurLayer->pDec->pData[2] + ((iMbY * iChromaStride + iMbX) << 3);
+
+  if (pCtx->eSliceType == P_SLICE) {
+    GetInterPred (pDstY, pDstCb, pDstCr, pCtx);
+  } else {
+    if (pCtx->pTempDec == NULL)
+      pCtx->pTempDec = AllocPicture (pCtx, pCtx->pSps->iMbWidth << 4, pCtx->pSps->iMbHeight << 4);
+    uint8_t*   pTempDstYCbCr[3];
+    uint8_t*   pDstYCbCr[3];
+    pTempDstYCbCr[0] = pCtx->pTempDec->pData[0] + ((iMbY * iLumaStride + iMbX) << 4);
+    pTempDstYCbCr[1] = pCtx->pTempDec->pData[1] + ((iMbY * iChromaStride + iMbX) << 3);
+    pTempDstYCbCr[2] = pCtx->pTempDec->pData[2] + ((iMbY * iChromaStride + iMbX) << 3);
+    pDstYCbCr[0] = pDstY;
+    pDstYCbCr[1] = pDstCb;
+    pDstYCbCr[2] = pDstCr;
+    GetInterBPred (pDstYCbCr, pTempDstYCbCr, pCtx);
+  }
+  WelsMbInterSampleConstruction (pCtx, pCurLayer, pDstY, pDstCb, pDstCr, iLumaStride, iChromaStride);
+
+  pCtx->sBlockFunc.pWelsSetNonZeroCountFunc (
+    pCurLayer->pNzc[pCurLayer->iMbXyIndex]); // set all none-zero nzc to 1; dbk can be opti!
+  return ERR_NONE;
+}
+
+void WelsLumaDcDequantIdct (int16_t* pBlock, int32_t iQp, PWelsDecoderContext pCtx) {
+  const int32_t kiQMul = pCtx->bUseScalingList ? pCtx->pDequant_coeff4x4[0][iQp][0] : (g_kuiDequantCoeff[iQp][0] << 4);
+#define STRIDE 16
+  int32_t i;
+  int32_t iTemp[16]; //FIXME check if this is a good idea
+  int16_t* pBlk = pBlock;
+  static const int32_t kiXOffset[4] = {0, STRIDE, STRIDE << 2,  5 * STRIDE};
+  static const int32_t kiYOffset[4] = {0, STRIDE << 1, STRIDE << 3, 10 * STRIDE};
+
+  for (i = 0; i < 4; i++) {
+    const int32_t kiOffset = kiYOffset[i];
+    const int32_t kiX1 = kiOffset + kiXOffset[2];
+    const int32_t kiX2 = STRIDE + kiOffset;
+    const int32_t kiX3 = kiOffset + kiXOffset[3];
+    const int32_t kiI4 = i << 2; // 4*i
+    const int32_t kiZ0 = pBlk[kiOffset] + pBlk[kiX1];
+    const int32_t kiZ1 = pBlk[kiOffset] - pBlk[kiX1];
+    const int32_t kiZ2 = pBlk[kiX2] - pBlk[kiX3];
+    const int32_t kiZ3 = pBlk[kiX2] + pBlk[kiX3];
+
+    iTemp[kiI4]  = kiZ0 + kiZ3;
+    iTemp[1 + kiI4] = kiZ1 + kiZ2;
+    iTemp[2 + kiI4] = kiZ1 - kiZ2;
+    iTemp[3 + kiI4] = kiZ0 - kiZ3;
+  }
+
+  for (i = 0; i < 4; i++) {
+    const int32_t kiOffset = kiXOffset[i];
+    const int32_t kiI4 = 4 + i;
+    const int32_t kiZ0 = iTemp[i] + iTemp[4 + kiI4];
+    const int32_t kiZ1 = iTemp[i] - iTemp[4 + kiI4];
+    const int32_t kiZ2 = iTemp[kiI4] - iTemp[8 + kiI4];
+    const int32_t kiZ3 = iTemp[kiI4] + iTemp[8 + kiI4];
+
+    pBlk[kiOffset] = ((kiZ0 + kiZ3) * kiQMul + (1 << 5)) >> 6; //FIXME think about merging this into decode_resdual
+    pBlk[kiYOffset[1] + kiOffset] = ((kiZ1 + kiZ2) * kiQMul + (1 << 5)) >> 6;
+    pBlk[kiYOffset[2] + kiOffset] = ((kiZ1 - kiZ2) * kiQMul + (1 << 5)) >> 6;
+    pBlk[kiYOffset[3] + kiOffset] = ((kiZ0 - kiZ3) * kiQMul + (1 << 5)) >> 6;
+  }
+#undef STRIDE
+}
+
+int32_t WelsMbIntraPredictionConstruction (PWelsDecoderContext pCtx, PDqLayer pCurLayer, bool bOutput) {
+//seems IPCM should not enter this path
+  int32_t iMbXy = pCurLayer->iMbXyIndex;
+
+  WelsFillRecNeededMbInfo (pCtx, bOutput, pCurLayer);
+
+  if (IS_INTRA16x16 (pCurLayer->pMbType[iMbXy])) {
+    RecI16x16Mb (iMbXy, pCtx, pCurLayer->pScaledTCoeff[iMbXy], pCurLayer);
+  } else if (IS_INTRA8x8 (pCurLayer->pMbType[iMbXy])) {
+    RecI8x8Mb (iMbXy, pCtx, pCurLayer->pScaledTCoeff[iMbXy], pCurLayer);
+  } else if (IS_INTRA4x4 (pCurLayer->pMbType[iMbXy])) {
+    RecI4x4Mb (iMbXy, pCtx, pCurLayer->pScaledTCoeff[iMbXy], pCurLayer);
+  }
+  return ERR_NONE;
+}
+
+int32_t WelsMbInterPrediction (PWelsDecoderContext pCtx, PDqLayer pCurLayer) {
+  int32_t iMbX = pCurLayer->iMbX;
+  int32_t iMbY = pCurLayer->iMbY;
+  uint8_t*  pDstY, *pDstCb, *pDstCr;
+
+  int32_t iLumaStride   = pCtx->pDec->iLinesize[0];
+  int32_t iChromaStride = pCtx->pDec->iLinesize[1];
+
+  pDstY  = pCurLayer->pDec->pData[0] + ((iMbY * iLumaStride + iMbX) << 4);
+  pDstCb = pCurLayer->pDec->pData[1] + ((iMbY * iChromaStride + iMbX) << 3);
+  pDstCr = pCurLayer->pDec->pData[2] + ((iMbY * iChromaStride + iMbX) << 3);
+
+  if (pCtx->eSliceType == P_SLICE) {
+    GetInterPred (pDstY, pDstCb, pDstCr, pCtx);
+  } else {
+    if (pCtx->pTempDec == NULL)
+      pCtx->pTempDec = AllocPicture (pCtx, pCtx->pSps->iMbWidth << 4, pCtx->pSps->iMbHeight << 4);
+    uint8_t*   pTempDstYCbCr[3];
+    uint8_t*   pDstYCbCr[3];
+    pTempDstYCbCr[0] = pCtx->pTempDec->pData[0] + ((iMbY * iLumaStride + iMbX) << 4);
+    pTempDstYCbCr[1] = pCtx->pTempDec->pData[1] + ((iMbY * iChromaStride + iMbX) << 3);
+    pTempDstYCbCr[2] = pCtx->pTempDec->pData[2] + ((iMbY * iChromaStride + iMbX) << 3);
+    pDstYCbCr[0] = pDstY;
+    pDstYCbCr[1] = pDstCb;
+    pDstYCbCr[2] = pDstCr;
+    GetInterBPred (pDstYCbCr, pTempDstYCbCr, pCtx);
+  }
+  return ERR_NONE;
+}
+
+int32_t WelsTargetMbConstruction (PWelsDecoderContext pCtx) {
+  PDqLayer pCurLayer = pCtx->pCurDqLayer;
+  if (MB_TYPE_INTRA_PCM == pCurLayer->pMbType[pCurLayer->iMbXyIndex]) {
+    //already decoded and reconstructed when parsing
+    return ERR_NONE;
+  } else if (IS_INTRA (pCurLayer->pMbType[pCurLayer->iMbXyIndex])) {
+    WelsMbIntraPredictionConstruction (pCtx, pCurLayer, 1);
+  } else if (IS_INTER (pCurLayer->pMbType[pCurLayer->iMbXyIndex])) { //InterMB
+    if (0 == pCurLayer->pCbp[pCurLayer->iMbXyIndex]) { //uiCbp==0 include SKIP
+      WelsMbInterPrediction (pCtx, pCurLayer);
+    } else {
+      WelsMbInterConstruction (pCtx, pCurLayer);
+    }
+  } else {
+    WelsLog (& (pCtx->sLogCtx), WELS_LOG_WARNING, "WelsTargetMbConstruction():::::Unknown MB type: %d",
+             pCurLayer->pMbType[pCurLayer->iMbXyIndex]);
+    return ERR_INFO_MB_RECON_FAIL;
+  }
+
+  return ERR_NONE;
+}
+
+void WelsChromaDcIdct (int16_t* pBlock) {
+  int32_t iStride = 32;
+  int32_t iXStride = 16;
+  int32_t iStride1 = iXStride + iStride;
+  int16_t* pBlk = pBlock;
+  int32_t iA, iB, iC, iD, iE;
+
+  iA = pBlk[0];
+  iB = pBlk[iXStride];
+  iC = pBlk[iStride];
+  iD = pBlk[iStride1];
+
+  iE = iA - iB;
+  iA += iB;
+  iB = iC - iD;
+  iC += iD;
+
+  pBlk[0] = (iA + iC);
+  pBlk[iXStride] = (iE + iB);
+  pBlk[iStride] = (iA - iC);
+  pBlk[iStride1] = (iE - iB);
+}
+
+void WelsMapNxNNeighToSampleNormal (PWelsNeighAvail pNeighAvail, int32_t* pSampleAvail) {
+  if (pNeighAvail->iLeftAvail) {  //left
+    pSampleAvail[ 6] =
+      pSampleAvail[12] =
+        pSampleAvail[18] =
+          pSampleAvail[24] = 1;
+  }
+  if (pNeighAvail->iLeftTopAvail) { //top_left
+    pSampleAvail[0] = 1;
+  }
+  if (pNeighAvail->iTopAvail) { //top
+    pSampleAvail[1] =
+      pSampleAvail[2] =
+        pSampleAvail[3] =
+          pSampleAvail[4] = 1;
+  }
+  if (pNeighAvail->iRightTopAvail) { //top_right
+    pSampleAvail[5] = 1;
+  }
+}
+
+void WelsMapNxNNeighToSampleConstrain1 (PWelsNeighAvail pNeighAvail, int32_t* pSampleAvail) {
+  if (pNeighAvail->iLeftAvail && IS_INTRA (pNeighAvail->iLeftType)) {   //left
+    pSampleAvail[ 6] =
+      pSampleAvail[12] =
+        pSampleAvail[18] =
+          pSampleAvail[24] = 1;
+  }
+  if (pNeighAvail->iLeftTopAvail && IS_INTRA (pNeighAvail->iLeftTopType)) {  //top_left
+    pSampleAvail[0] = 1;
+  }
+  if (pNeighAvail->iTopAvail && IS_INTRA (pNeighAvail->iTopType)) {  //top
+    pSampleAvail[1] =
+      pSampleAvail[2] =
+        pSampleAvail[3] =
+          pSampleAvail[4] = 1;
+  }
+  if (pNeighAvail->iRightTopAvail && IS_INTRA (pNeighAvail->iRightTopType)) {  //top_right
+    pSampleAvail[5] = 1;
+  }
+}
+void WelsMap16x16NeighToSampleNormal (PWelsNeighAvail pNeighAvail, uint8_t* pSampleAvail) {
+  if (pNeighAvail->iLeftAvail) {
+    *pSampleAvail = (1 << 2);
+  }
+  if (pNeighAvail->iLeftTopAvail) {
+    *pSampleAvail |= (1 << 1);
+  }
+  if (pNeighAvail->iTopAvail) {
+    *pSampleAvail |= 1;
+  }
+}
+
+void WelsMap16x16NeighToSampleConstrain1 (PWelsNeighAvail pNeighAvail, uint8_t* pSampleAvail) {
+  if (pNeighAvail->iLeftAvail && IS_INTRA (pNeighAvail->iLeftType)) {
+    *pSampleAvail = (1 << 2);
+  }
+  if (pNeighAvail->iLeftTopAvail && IS_INTRA (pNeighAvail->iLeftTopType)) {
+    *pSampleAvail |= (1 << 1);
+  }
+  if (pNeighAvail->iTopAvail && IS_INTRA (pNeighAvail->iTopType)) {
+    *pSampleAvail |= 1;
+  }
+}
+
+int32_t ParseIntra4x4Mode (PWelsDecoderContext pCtx, PWelsNeighAvail pNeighAvail, int8_t* pIntraPredMode,
+                           PBitStringAux pBs,
+                           PDqLayer pCurDqLayer) {
+  int32_t iSampleAvail[5 * 6] = { 0 }; //initialize as 0
+  int32_t iMbXy = pCurDqLayer->iMbXyIndex;
+  int32_t iFinalMode, i;
+
+  uint8_t uiNeighAvail = 0;
+  uint32_t uiCode;
+  int32_t iCode;
+  pCtx->pMapNxNNeighToSampleFunc (pNeighAvail, iSampleAvail);
+  uiNeighAvail = (iSampleAvail[6] << 2) | (iSampleAvail[0] << 1) | (iSampleAvail[1]);
+  for (i = 0; i < 16; i++) {
+    int32_t iPrevIntra4x4PredMode = 0;
+    if (pCurDqLayer->sLayerInfo.pPps->bEntropyCodingModeFlag) {
+      WELS_READ_VERIFY (ParseIntraPredModeLumaCabac (pCtx, iCode));
+      iPrevIntra4x4PredMode = iCode;
+    } else {
+      WELS_READ_VERIFY (BsGetOneBit (pBs, &uiCode));
+      iPrevIntra4x4PredMode = uiCode;
+    }
+    const int32_t kiPredMode = PredIntra4x4Mode (pIntraPredMode, i);
+
+    int8_t iBestMode;
+    if (pCurDqLayer->sLayerInfo.pPps->bEntropyCodingModeFlag) {
+      if (iPrevIntra4x4PredMode == -1)
+        iBestMode = kiPredMode;
+      else
+        iBestMode = iPrevIntra4x4PredMode + (iPrevIntra4x4PredMode >= kiPredMode);
+    } else {
+      if (iPrevIntra4x4PredMode) {
+        iBestMode = kiPredMode;
+      } else {
+        WELS_READ_VERIFY (BsGetBits (pBs, 3, &uiCode));
+        iBestMode = uiCode + ((int32_t) uiCode >= kiPredMode);
+      }
+    }
+
+    iFinalMode = CheckIntraNxNPredMode (&iSampleAvail[0], &iBestMode, i, false);
+    if (iFinalMode == GENERATE_ERROR_NO (ERR_LEVEL_MB_DATA, ERR_INVALID_INTRA4X4_MODE)) {
+      return GENERATE_ERROR_NO (ERR_LEVEL_MB_DATA, ERR_INFO_INVALID_I4x4_PRED_MODE);
+    }
+
+    pCurDqLayer->pIntra4x4FinalMode[iMbXy][g_kuiScan4[i]] = iFinalMode;
+
+    pIntraPredMode[g_kuiScan8[i]] = iBestMode;
+
+    iSampleAvail[g_kuiCache30ScanIdx[i]] = 1;
+  }
+  ST32 (&pCurDqLayer->pIntraPredMode[iMbXy][0], LD32 (&pIntraPredMode[1 + 8 * 4]));
+  pCurDqLayer->pIntraPredMode[iMbXy][4] = pIntraPredMode[4 + 8 * 1];
+  pCurDqLayer->pIntraPredMode[iMbXy][5] = pIntraPredMode[4 + 8 * 2];
+  pCurDqLayer->pIntraPredMode[iMbXy][6] = pIntraPredMode[4 + 8 * 3];
+
+  if (pCtx->pSps->uiChromaFormatIdc == 0)//no need parse chroma
+    return ERR_NONE;
+
+  if (pCurDqLayer->sLayerInfo.pPps->bEntropyCodingModeFlag) {
+    WELS_READ_VERIFY (ParseIntraPredModeChromaCabac (pCtx, uiNeighAvail, iCode));
+    if (iCode > MAX_PRED_MODE_ID_CHROMA) {
+      return GENERATE_ERROR_NO (ERR_LEVEL_MB_DATA, ERR_INFO_INVALID_I_CHROMA_PRED_MODE);
+    }
+    pCurDqLayer->pChromaPredMode[iMbXy] = iCode;
+  } else {
+    WELS_READ_VERIFY (BsGetUe (pBs, &uiCode)); //intra_chroma_pred_mode
+    if (uiCode > MAX_PRED_MODE_ID_CHROMA) {
+      return GENERATE_ERROR_NO (ERR_LEVEL_MB_DATA, ERR_INFO_INVALID_I_CHROMA_PRED_MODE);
+    }
+    pCurDqLayer->pChromaPredMode[iMbXy] = uiCode;
+  }
+
+  if (-1 == pCurDqLayer->pChromaPredMode[iMbXy]
+      || CheckIntraChromaPredMode (uiNeighAvail, &pCurDqLayer->pChromaPredMode[iMbXy])) {
+    return GENERATE_ERROR_NO (ERR_LEVEL_MB_DATA, ERR_INFO_INVALID_I_CHROMA_PRED_MODE);
+  }
+  return ERR_NONE;
+}
+
+int32_t ParseIntra8x8Mode (PWelsDecoderContext pCtx, PWelsNeighAvail pNeighAvail, int8_t* pIntraPredMode,
+                           PBitStringAux pBs,
+                           PDqLayer pCurDqLayer) {
+  // Similar with Intra_4x4, can put them together when needed
+  int32_t iSampleAvail[5 * 6] = { 0 }; //initialize as 0
+  int32_t iMbXy = pCurDqLayer->iMbXyIndex;
+  int32_t iFinalMode, i;
+
+  uint8_t uiNeighAvail = 0;
+  uint32_t uiCode;
+  int32_t iCode;
+  pCtx->pMapNxNNeighToSampleFunc (pNeighAvail, iSampleAvail);
+  // Top-Right : Left : Top-Left : Top
+  uiNeighAvail = (iSampleAvail[5] << 3) | (iSampleAvail[6] << 2) | (iSampleAvail[0] << 1) | (iSampleAvail[1]);
+
+  pCurDqLayer->pIntraNxNAvailFlag[iMbXy] = uiNeighAvail;
+
+  for (i = 0; i < 4; i++) {
+    int32_t iPrevIntra4x4PredMode = 0;
+    if (pCurDqLayer->sLayerInfo.pPps->bEntropyCodingModeFlag) {
+      WELS_READ_VERIFY (ParseIntraPredModeLumaCabac (pCtx, iCode));
+      iPrevIntra4x4PredMode = iCode;
+    } else {
+      WELS_READ_VERIFY (BsGetOneBit (pBs, &uiCode));
+      iPrevIntra4x4PredMode = uiCode;
+    }
+    const int32_t kiPredMode = PredIntra4x4Mode (pIntraPredMode, i << 2);
+
+    int8_t iBestMode;
+    if (pCurDqLayer->sLayerInfo.pPps->bEntropyCodingModeFlag) {
+      if (iPrevIntra4x4PredMode == -1)
+        iBestMode = kiPredMode;
+      else
+        iBestMode = iPrevIntra4x4PredMode + (iPrevIntra4x4PredMode >= kiPredMode);
+    } else {
+      if (iPrevIntra4x4PredMode) {
+        iBestMode = kiPredMode;
+      } else {
+        WELS_READ_VERIFY (BsGetBits (pBs, 3, &uiCode));
+        iBestMode = uiCode + ((int32_t) uiCode >= kiPredMode);
+      }
+    }
+
+    iFinalMode = CheckIntraNxNPredMode (&iSampleAvail[0], &iBestMode, i << 2, true);
+
+    if (iFinalMode == GENERATE_ERROR_NO (ERR_LEVEL_MB_DATA, ERR_INVALID_INTRA4X4_MODE)) {
+      return GENERATE_ERROR_NO (ERR_LEVEL_MB_DATA, ERR_INFO_INVALID_I4x4_PRED_MODE);
+    }
+
+    for (int j = 0; j < 4; j++) {
+      pCurDqLayer->pIntra4x4FinalMode[iMbXy][g_kuiScan4[ (i << 2) + j]] = iFinalMode;
+      pIntraPredMode[g_kuiScan8[ (i << 2) + j]] = iBestMode;
+      iSampleAvail[g_kuiCache30ScanIdx[ (i << 2) + j]] = 1;
+    }
+  }
+  ST32 (&pCurDqLayer->pIntraPredMode[iMbXy][0], LD32 (&pIntraPredMode[1 + 8 * 4]));
+  pCurDqLayer->pIntraPredMode[iMbXy][4] = pIntraPredMode[4 + 8 * 1];
+  pCurDqLayer->pIntraPredMode[iMbXy][5] = pIntraPredMode[4 + 8 * 2];
+  pCurDqLayer->pIntraPredMode[iMbXy][6] = pIntraPredMode[4 + 8 * 3];
+
+  if (pCtx->pSps->uiChromaFormatIdc == 0)
+    return ERR_NONE;
+
+  if (pCurDqLayer->sLayerInfo.pPps->bEntropyCodingModeFlag) {
+    WELS_READ_VERIFY (ParseIntraPredModeChromaCabac (pCtx, uiNeighAvail, iCode));
+    if (iCode > MAX_PRED_MODE_ID_CHROMA) {
+      return GENERATE_ERROR_NO (ERR_LEVEL_MB_DATA, ERR_INFO_INVALID_I_CHROMA_PRED_MODE);
+    }
+    pCurDqLayer->pChromaPredMode[iMbXy] = iCode;
+  } else {
+    WELS_READ_VERIFY (BsGetUe (pBs, &uiCode)); //intra_chroma_pred_mode
+    if (uiCode > MAX_PRED_MODE_ID_CHROMA) {
+      return GENERATE_ERROR_NO (ERR_LEVEL_MB_DATA, ERR_INFO_INVALID_I_CHROMA_PRED_MODE);
+    }
+    pCurDqLayer->pChromaPredMode[iMbXy] = uiCode;
+  }
+
+  if (-1 == pCurDqLayer->pChromaPredMode[iMbXy]
+      || CheckIntraChromaPredMode (uiNeighAvail, &pCurDqLayer->pChromaPredMode[iMbXy])) {
+    return GENERATE_ERROR_NO (ERR_LEVEL_MB_DATA, ERR_INFO_INVALID_I_CHROMA_PRED_MODE);
+  }
+
+  return ERR_NONE;
+}
+
+int32_t ParseIntra16x16Mode (PWelsDecoderContext pCtx, PWelsNeighAvail pNeighAvail, PBitStringAux pBs,
+                             PDqLayer pCurDqLayer) {
+  int32_t iMbXy = pCurDqLayer->iMbXyIndex;
+  uint8_t uiNeighAvail = 0; //0x07 = 0 1 1 1, means left, top-left, top avail or not. (1: avail, 0: unavail)
+  uint32_t uiCode;
+  int32_t iCode;
+  pCtx->pMap16x16NeighToSampleFunc (pNeighAvail, &uiNeighAvail);
+
+  if (CheckIntra16x16PredMode (uiNeighAvail,
+                               &pCurDqLayer->pIntraPredMode[iMbXy][7])) { //invalid iPredMode, must stop decoding
+    return GENERATE_ERROR_NO (ERR_LEVEL_MB_DATA, ERR_INFO_INVALID_I16x16_PRED_MODE);
+  }
+  if (pCtx->pSps->uiChromaFormatIdc == 0)
+    return ERR_NONE;
+
+  if (pCurDqLayer->sLayerInfo.pPps->bEntropyCodingModeFlag) {
+    WELS_READ_VERIFY (ParseIntraPredModeChromaCabac (pCtx, uiNeighAvail, iCode));
+    if (iCode > MAX_PRED_MODE_ID_CHROMA) {
+      return GENERATE_ERROR_NO (ERR_LEVEL_MB_DATA, ERR_INFO_INVALID_I_CHROMA_PRED_MODE);
+    }
+    pCurDqLayer->pChromaPredMode[iMbXy] = iCode;
+  } else {
+    WELS_READ_VERIFY (BsGetUe (pBs, &uiCode)); //intra_chroma_pred_mode
+    if (uiCode > MAX_PRED_MODE_ID_CHROMA) {
+      return GENERATE_ERROR_NO (ERR_LEVEL_MB_DATA, ERR_INFO_INVALID_I_CHROMA_PRED_MODE);
+    }
+    pCurDqLayer->pChromaPredMode[iMbXy] = uiCode;
+  }
+  if (-1 == pCurDqLayer->pChromaPredMode[iMbXy]
+      || CheckIntraChromaPredMode (uiNeighAvail, &pCurDqLayer->pChromaPredMode[iMbXy])) {
+    return GENERATE_ERROR_NO (ERR_LEVEL_MB_DATA, ERR_INFO_INVALID_I_CHROMA_PRED_MODE);
+  }
+
+  return ERR_NONE;
+}
+
+int32_t WelsDecodeMbCabacISliceBaseMode0 (PWelsDecoderContext pCtx, uint32_t& uiEosFlag) {
+  PDqLayer pCurLayer             = pCtx->pCurDqLayer;
+  PBitStringAux pBsAux           = pCurLayer->pBitStringAux;
+  PSlice pSlice                  = &pCurLayer->sLayerInfo.sSliceInLayer;
+  PSliceHeader pSliceHeader      = &pSlice->sSliceHeaderExt.sSliceHeader;
+  SWelsNeighAvail sNeighAvail;
+  int32_t iScanIdxStart = pSlice->sSliceHeaderExt.uiScanIdxStart;
+  int32_t iScanIdxEnd   = pSlice->sSliceHeaderExt.uiScanIdxEnd;
+  int32_t iMbXy = pCurLayer->iMbXyIndex;
+  int32_t i;
+  uint32_t uiMbType = 0, uiCbp = 0, uiCbpLuma = 0, uiCbpChroma = 0;
+
+  ENFORCE_STACK_ALIGN_1D (uint8_t, pNonZeroCount, 48, 16);
+
+  pCurLayer->pNoSubMbPartSizeLessThan8x8Flag[iMbXy] = true;
+  pCurLayer->pTransformSize8x8Flag[iMbXy] = false;
+
+  pCurLayer->pInterPredictionDoneFlag[iMbXy] = 0;
+  pCurLayer->pResidualPredFlag[iMbXy] = pSlice->sSliceHeaderExt.bDefaultResidualPredFlag;
+  GetNeighborAvailMbType (&sNeighAvail, pCurLayer);
+  WELS_READ_VERIFY (ParseMBTypeISliceCabac (pCtx, &sNeighAvail, uiMbType));
+  if (uiMbType > 25) {
+    return GENERATE_ERROR_NO (ERR_LEVEL_MB_DATA, ERR_INFO_INVALID_MB_TYPE);
+  } else if (!pCtx->pSps->uiChromaFormatIdc && ((uiMbType >= 5 && uiMbType <= 12) || (uiMbType >= 17
+             && uiMbType <= 24))) {
+    return GENERATE_ERROR_NO (ERR_LEVEL_MB_DATA, ERR_INFO_INVALID_MB_TYPE);
+  } else if (25 == uiMbType) {   //I_PCM
+    WelsLog (& (pCtx->sLogCtx), WELS_LOG_DEBUG, "I_PCM mode exists in I slice!");
+    WELS_READ_VERIFY (ParseIPCMInfoCabac (pCtx));
+    pSlice->iLastDeltaQp = 0;
+    WELS_READ_VERIFY (ParseEndOfSliceCabac (pCtx, uiEosFlag));
+    if (uiEosFlag) {
+      RestoreCabacDecEngineToBS (pCtx->pCabacDecEngine, pCtx->pCurDqLayer->pBitStringAux);
+    }
+    return ERR_NONE;
+  } else if (0 == uiMbType) { //I4x4
+    ENFORCE_STACK_ALIGN_1D (int8_t, pIntraPredMode, 48, 16);
+    pCurLayer->pMbType[iMbXy] = MB_TYPE_INTRA4x4;
+    if (pCtx->pPps->bTransform8x8ModeFlag) {
+      // Transform 8x8 cabac will be added soon
+      WELS_READ_VERIFY (ParseTransformSize8x8FlagCabac (pCtx, &sNeighAvail, pCtx->pCurDqLayer->pTransformSize8x8Flag[iMbXy]));
+    }
+    if (pCtx->pCurDqLayer->pTransformSize8x8Flag[iMbXy]) {
+      uiMbType = pCurLayer->pMbType[iMbXy] = MB_TYPE_INTRA8x8;
+      pCtx->pFillInfoCacheIntraNxNFunc (&sNeighAvail, pNonZeroCount, pIntraPredMode, pCurLayer);
+      WELS_READ_VERIFY (ParseIntra8x8Mode (pCtx, &sNeighAvail, pIntraPredMode, pBsAux, pCurLayer));
+    } else {
+      pCtx->pFillInfoCacheIntraNxNFunc (&sNeighAvail, pNonZeroCount, pIntraPredMode, pCurLayer);
+      WELS_READ_VERIFY (ParseIntra4x4Mode (pCtx, &sNeighAvail, pIntraPredMode, pBsAux, pCurLayer));
+    }
+    //get uiCbp for I4x4
+    WELS_READ_VERIFY (ParseCbpInfoCabac (pCtx, &sNeighAvail, uiCbp));
+    pCurLayer->pCbp[iMbXy] = uiCbp;
+    pSlice->iLastDeltaQp = uiCbp == 0 ? 0 : pSlice->iLastDeltaQp;
+    uiCbpChroma = pCtx->pSps->uiChromaFormatIdc ? uiCbp >> 4 : 0;
+    uiCbpLuma = uiCbp & 15;
+  } else { //I16x16;
+    pCurLayer->pMbType[iMbXy] = MB_TYPE_INTRA16x16;
+    pCurLayer->pTransformSize8x8Flag[iMbXy] = false;
+    pCurLayer->pNoSubMbPartSizeLessThan8x8Flag[iMbXy] = true;
+    pCurLayer->pIntraPredMode[iMbXy][7] = (uiMbType - 1) & 3;
+    pCurLayer->pCbp[iMbXy] = g_kuiI16CbpTable[ (uiMbType - 1) >> 2];
+    uiCbpChroma = pCtx->pSps->uiChromaFormatIdc ? pCurLayer->pCbp[iMbXy] >> 4 : 0 ;
+    uiCbpLuma = pCurLayer->pCbp[iMbXy] & 15;
+    WelsFillCacheNonZeroCount (&sNeighAvail, pNonZeroCount, pCurLayer);
+    WELS_READ_VERIFY (ParseIntra16x16Mode (pCtx, &sNeighAvail, pBsAux, pCurLayer));
+  }
+
+  ST32 (&pCurLayer->pNzc[iMbXy][0], 0);
+  ST32 (&pCurLayer->pNzc[iMbXy][4], 0);
+  ST32 (&pCurLayer->pNzc[iMbXy][8], 0);
+  ST32 (&pCurLayer->pNzc[iMbXy][12], 0);
+  ST32 (&pCurLayer->pNzc[iMbXy][16], 0);
+  ST32 (&pCurLayer->pNzc[iMbXy][20], 0);
+  pCurLayer->pCbfDc[iMbXy] = 0;
+
+  if (pCurLayer->pCbp[iMbXy] == 0 && IS_INTRANxN (pCurLayer->pMbType[iMbXy])) {
+    pCurLayer->pLumaQp[iMbXy] = pSlice->iLastMbQp;
+    for (i = 0; i < 2; i++) {
+      pCurLayer->pChromaQp[iMbXy][i] = g_kuiChromaQpTable[WELS_CLIP3 ((pCurLayer->pLumaQp[iMbXy] +
+                                       pSliceHeader->pPps->iChromaQpIndexOffset[i]), 0, 51)];
+    }
+  }
+
+  if (pCurLayer->pCbp[iMbXy] || MB_TYPE_INTRA16x16 == pCurLayer->pMbType[iMbXy]) {
+    memset (pCurLayer->pScaledTCoeff[iMbXy], 0, 384 * sizeof (pCurLayer->pScaledTCoeff[iMbXy][0]));
+    int32_t iQpDelta, iId8x8, iId4x4;
+    WELS_READ_VERIFY (ParseDeltaQpCabac (pCtx, iQpDelta));
+    if (iQpDelta > 25 || iQpDelta < -26) {//out of iQpDelta range
+      return GENERATE_ERROR_NO (ERR_LEVEL_MB_DATA, ERR_INFO_INVALID_QP);
+    }
+    pCurLayer->pLumaQp[iMbXy] = (pSlice->iLastMbQp + iQpDelta + 52) % 52; //update last_mb_qp
+    pSlice->iLastMbQp = pCurLayer->pLumaQp[iMbXy];
+    for (i = 0; i < 2; i++) {
+      pCurLayer->pChromaQp[iMbXy][i] = g_kuiChromaQpTable[WELS_CLIP3 ((pSlice->iLastMbQp +
+                                       pSliceHeader->pPps->iChromaQpIndexOffset[i]), 0, 51)];
+    }
+    if (MB_TYPE_INTRA16x16 == pCurLayer->pMbType[iMbXy]) {
+      //step1: Luma DC
+      WELS_READ_VERIFY (ParseResidualBlockCabac (&sNeighAvail, pNonZeroCount, pBsAux, 0, 16, g_kuiLumaDcZigzagScan,
+                        I16_LUMA_DC, pCurLayer->pScaledTCoeff[iMbXy], pCurLayer->pLumaQp[iMbXy], pCtx));
+      //step2: Luma AC
+      if (uiCbpLuma) {
+        for (i = 0; i < 16; i++) {
+          WELS_READ_VERIFY (ParseResidualBlockCabac (&sNeighAvail, pNonZeroCount, pBsAux, i,
+                            iScanIdxEnd - WELS_MAX (iScanIdxStart, 1) + 1, g_kuiZigzagScan + WELS_MAX (iScanIdxStart, 1), I16_LUMA_AC,
+                            pCurLayer->pScaledTCoeff[iMbXy] + (i << 4), pCurLayer->pLumaQp[iMbXy], pCtx));
+        }
+        ST32 (&pCurLayer->pNzc[iMbXy][0], LD32 (&pNonZeroCount[1 + 8 * 1]));
+        ST32 (&pCurLayer->pNzc[iMbXy][4], LD32 (&pNonZeroCount[1 + 8 * 2]));
+        ST32 (&pCurLayer->pNzc[iMbXy][8], LD32 (&pNonZeroCount[1 + 8 * 3]));
+        ST32 (&pCurLayer->pNzc[iMbXy][12], LD32 (&pNonZeroCount[1 + 8 * 4]));
+      } else { //pNonZeroCount = 0
+        ST32 (&pCurLayer->pNzc[iMbXy][0], 0);
+        ST32 (&pCurLayer->pNzc[iMbXy][4], 0);
+        ST32 (&pCurLayer->pNzc[iMbXy][8], 0);
+        ST32 (&pCurLayer->pNzc[iMbXy][12], 0);
+      }
+    } else { //non-MB_TYPE_INTRA16x16
+      if (pCurLayer->pTransformSize8x8Flag[iMbXy]) {
+        // Transform 8x8 support for CABAC
+        for (iId8x8 = 0; iId8x8 < 4; iId8x8++) {
+          if (uiCbpLuma & (1 << iId8x8)) {
+            WELS_READ_VERIFY (ParseResidualBlockCabac8x8 (&sNeighAvail, pNonZeroCount, pBsAux, (iId8x8 << 2),
+                              iScanIdxEnd - iScanIdxStart + 1, g_kuiZigzagScan8x8 + iScanIdxStart, LUMA_DC_AC_INTRA_8,
+                              pCurLayer->pScaledTCoeff[iMbXy] + (iId8x8 << 6), pCurLayer->pLumaQp[iMbXy], pCtx));
+          } else {
+            ST16 (&pNonZeroCount[g_kCacheNzcScanIdx[ (iId8x8 << 2)]], 0);
+            ST16 (&pNonZeroCount[g_kCacheNzcScanIdx[ (iId8x8 << 2) + 2]], 0);
+          }
+        }
+        ST32 (&pCurLayer->pNzc[iMbXy][0], LD32 (&pNonZeroCount[1 + 8 * 1]));
+        ST32 (&pCurLayer->pNzc[iMbXy][4], LD32 (&pNonZeroCount[1 + 8 * 2]));
+        ST32 (&pCurLayer->pNzc[iMbXy][8], LD32 (&pNonZeroCount[1 + 8 * 3]));
+        ST32 (&pCurLayer->pNzc[iMbXy][12], LD32 (&pNonZeroCount[1 + 8 * 4]));
+      } else {
+        for (iId8x8 = 0; iId8x8 < 4; iId8x8++) {
+          if (uiCbpLuma & (1 << iId8x8)) {
+            int32_t iIdx = (iId8x8 << 2);
+            for (iId4x4 = 0; iId4x4 < 4; iId4x4++) {
+              //Luma (DC and AC decoding together)
+              WELS_READ_VERIFY (ParseResidualBlockCabac (&sNeighAvail, pNonZeroCount, pBsAux, iIdx, iScanIdxEnd - iScanIdxStart + 1,
+                                g_kuiZigzagScan + iScanIdxStart, LUMA_DC_AC_INTRA, pCurLayer->pScaledTCoeff[iMbXy] + (iIdx << 4),
+                                pCurLayer->pLumaQp[iMbXy], pCtx));
+              iIdx++;
+            }
+          } else {
+            ST16 (&pNonZeroCount[g_kCacheNzcScanIdx[ (iId8x8 << 2)]], 0);
+            ST16 (&pNonZeroCount[g_kCacheNzcScanIdx[ (iId8x8 << 2) + 2]], 0);
+          }
+        }
+        ST32 (&pCurLayer->pNzc[iMbXy][0], LD32 (&pNonZeroCount[1 + 8 * 1]));
+        ST32 (&pCurLayer->pNzc[iMbXy][4], LD32 (&pNonZeroCount[1 + 8 * 2]));
+        ST32 (&pCurLayer->pNzc[iMbXy][8], LD32 (&pNonZeroCount[1 + 8 * 3]));
+        ST32 (&pCurLayer->pNzc[iMbXy][12], LD32 (&pNonZeroCount[1 + 8 * 4]));
+      }
+    }
+    int32_t iMbResProperty;
+    //chroma
+    //step1: DC
+    if (1 == uiCbpChroma || 2 == uiCbpChroma) {
+      //Cb Cr
+      for (i = 0; i < 2; i++) {
+        iMbResProperty = i ? CHROMA_DC_V : CHROMA_DC_U;
+        WELS_READ_VERIFY (ParseResidualBlockCabac (&sNeighAvail, pNonZeroCount, pBsAux, 16 + (i << 2), 4, g_kuiChromaDcScan,
+                          iMbResProperty, pCurLayer->pScaledTCoeff[iMbXy] + 256 + (i << 6), pCurLayer->pChromaQp[iMbXy][i], pCtx));
+      }
+    }
+
+    //step2: AC
+    if (2 == uiCbpChroma) {
+      for (i = 0; i < 2; i++) { //Cb Cr
+        iMbResProperty = i ? CHROMA_AC_V : CHROMA_AC_U;
+        int32_t iIdx = 16 + (i << 2);
+        for (iId4x4 = 0; iId4x4 < 4; iId4x4++) {
+          WELS_READ_VERIFY (ParseResidualBlockCabac (&sNeighAvail, pNonZeroCount, pBsAux, iIdx,
+                            iScanIdxEnd - WELS_MAX (iScanIdxStart, 1) + 1, g_kuiZigzagScan + WELS_MAX (iScanIdxStart, 1), iMbResProperty,
+                            pCurLayer->pScaledTCoeff[iMbXy] + (iIdx << 4), pCurLayer->pChromaQp[iMbXy][i], pCtx));
+          iIdx++;
+        }
+      }
+      ST16 (&pCurLayer->pNzc[iMbXy][16], LD16 (&pNonZeroCount[6 + 8 * 1]));
+      ST16 (&pCurLayer->pNzc[iMbXy][20], LD16 (&pNonZeroCount[6 + 8 * 2]));
+      ST16 (&pCurLayer->pNzc[iMbXy][18], LD16 (&pNonZeroCount[6 + 8 * 4]));
+      ST16 (&pCurLayer->pNzc[iMbXy][22], LD16 (&pNonZeroCount[6 + 8 * 5]));
+    } else {
+      ST16 (&pCurLayer->pNzc[iMbXy][16], 0);
+      ST16 (&pCurLayer->pNzc[iMbXy][20], 0);
+      ST16 (&pCurLayer->pNzc[iMbXy][18], 0);
+      ST16 (&pCurLayer->pNzc[iMbXy][22], 0);
+    }
+  } else {
+    ST32 (&pCurLayer->pNzc[iMbXy][0], 0);
+    ST32 (&pCurLayer->pNzc[iMbXy][4], 0);
+    ST32 (&pCurLayer->pNzc[iMbXy][8], 0);
+    ST32 (&pCurLayer->pNzc[iMbXy][12], 0);
+    ST32 (&pCurLayer->pNzc[iMbXy][16], 0);
+    ST32 (&pCurLayer->pNzc[iMbXy][20], 0);
+  }
+
+  WELS_READ_VERIFY (ParseEndOfSliceCabac (pCtx, uiEosFlag));
+  if (uiEosFlag) {
+    RestoreCabacDecEngineToBS (pCtx->pCabacDecEngine, pCtx->pCurDqLayer->pBitStringAux);
+  }
+  return ERR_NONE;
+}
+
+int32_t WelsDecodeMbCabacISlice (PWelsDecoderContext pCtx, PNalUnit pNalCur, uint32_t& uiEosFlag) {
+  WELS_READ_VERIFY (WelsDecodeMbCabacISliceBaseMode0 (pCtx, uiEosFlag));
+  return ERR_NONE;
+}
+
+int32_t WelsDecodeMbCabacPSliceBaseMode0 (PWelsDecoderContext pCtx, PWelsNeighAvail pNeighAvail, uint32_t& uiEosFlag) {
+  PDqLayer pCurLayer             = pCtx->pCurDqLayer;
+  PBitStringAux pBsAux           = pCurLayer->pBitStringAux;
+  PSlice pSlice                  = &pCurLayer->sLayerInfo.sSliceInLayer;
+  PSliceHeader pSliceHeader      = &pSlice->sSliceHeaderExt.sSliceHeader;
+
+  int32_t iScanIdxStart = pSlice->sSliceHeaderExt.uiScanIdxStart;
+  int32_t iScanIdxEnd   = pSlice->sSliceHeaderExt.uiScanIdxEnd;
+  int32_t iMbXy = pCurLayer->iMbXyIndex;
+  int32_t iMbResProperty;
+  int32_t i;
+  uint32_t uiMbType = 0, uiCbp = 0, uiCbpLuma = 0, uiCbpChroma = 0;
+
+  ENFORCE_STACK_ALIGN_1D (uint8_t, pNonZeroCount, 48, 16);
+
+  pCurLayer->pInterPredictionDoneFlag[iMbXy] = 0;
+
+  WELS_READ_VERIFY (ParseMBTypePSliceCabac (pCtx, pNeighAvail, uiMbType));
+  // uiMbType = 4 is not allowded.
+  if (uiMbType < 4) { //Inter mode
+    int16_t pMotionVector[LIST_A][30][MV_A];
+    int16_t pMvdCache[LIST_A][30][MV_A];
+    int8_t  pRefIndex[LIST_A][30];
+    pCurLayer->pMbType[iMbXy] = g_ksInterPMbTypeInfo[uiMbType].iType;
+    WelsFillCacheInterCabac (pNeighAvail, pNonZeroCount, pMotionVector, pMvdCache, pRefIndex, pCurLayer);
+    WELS_READ_VERIFY (ParseInterPMotionInfoCabac (pCtx, pNeighAvail, pNonZeroCount, pMotionVector, pMvdCache, pRefIndex));
+    pCurLayer->pInterPredictionDoneFlag[iMbXy] = 0;
+  } else { //Intra mode
+    uiMbType -= 5;
+    if (uiMbType > 25)
+      return GENERATE_ERROR_NO (ERR_LEVEL_MB_DATA, ERR_INFO_INVALID_MB_TYPE);
+    if (!pCtx->pSps->uiChromaFormatIdc && ((uiMbType >= 5 && uiMbType <= 12) || (uiMbType >= 17 && uiMbType <= 24)))
+      return GENERATE_ERROR_NO (ERR_LEVEL_MB_DATA, ERR_INFO_INVALID_MB_TYPE);
+
+    if (25 == uiMbType) {   //I_PCM
+      WelsLog (& (pCtx->sLogCtx), WELS_LOG_DEBUG, "I_PCM mode exists in P slice!");
+      WELS_READ_VERIFY (ParseIPCMInfoCabac (pCtx));
+      pSlice->iLastDeltaQp = 0;
+      WELS_READ_VERIFY (ParseEndOfSliceCabac (pCtx, uiEosFlag));
+      if (uiEosFlag) {
+        RestoreCabacDecEngineToBS (pCtx->pCabacDecEngine, pCtx->pCurDqLayer->pBitStringAux);
+      }
+      return ERR_NONE;
+    } else { //normal Intra mode
+      if (0 == uiMbType) { //Intra4x4
+        ENFORCE_STACK_ALIGN_1D (int8_t, pIntraPredMode, 48, 16);
+        pCurLayer->pMbType[iMbXy] = MB_TYPE_INTRA4x4;
+        if (pCtx->pPps->bTransform8x8ModeFlag) {
+          WELS_READ_VERIFY (ParseTransformSize8x8FlagCabac (pCtx, pNeighAvail, pCtx->pCurDqLayer->pTransformSize8x8Flag[iMbXy]));
+        }
+        if (pCtx->pCurDqLayer->pTransformSize8x8Flag[iMbXy]) {
+          uiMbType = pCurLayer->pMbType[iMbXy] = MB_TYPE_INTRA8x8;
+          pCtx->pFillInfoCacheIntraNxNFunc (pNeighAvail, pNonZeroCount, pIntraPredMode, pCurLayer);
+          WELS_READ_VERIFY (ParseIntra8x8Mode (pCtx, pNeighAvail, pIntraPredMode, pBsAux, pCurLayer));
+        } else {
+          pCtx->pFillInfoCacheIntraNxNFunc (pNeighAvail, pNonZeroCount, pIntraPredMode, pCurLayer);
+          WELS_READ_VERIFY (ParseIntra4x4Mode (pCtx, pNeighAvail, pIntraPredMode, pBsAux, pCurLayer));
+        }
+      } else { //Intra16x16
+        pCurLayer->pMbType[iMbXy] = MB_TYPE_INTRA16x16;
+        pCurLayer->pTransformSize8x8Flag[iMbXy] = false;
+        pCurLayer->pNoSubMbPartSizeLessThan8x8Flag[iMbXy] = true;
+        pCurLayer->pIntraPredMode[iMbXy][7] = (uiMbType - 1) & 3;
+        pCurLayer->pCbp[iMbXy] = g_kuiI16CbpTable[ (uiMbType - 1) >> 2];
+        uiCbpChroma = pCtx->pSps->uiChromaFormatIdc ? pCurLayer->pCbp[iMbXy] >> 4 : 0;
+        uiCbpLuma = pCurLayer->pCbp[iMbXy] & 15;
+        WelsFillCacheNonZeroCount (pNeighAvail, pNonZeroCount, pCurLayer);
+        WELS_READ_VERIFY (ParseIntra16x16Mode (pCtx, pNeighAvail, pBsAux, pCurLayer));
+      }
+    }
+  }
+
+  ST32 (&pCurLayer->pNzc[iMbXy][0], 0);
+  ST32 (&pCurLayer->pNzc[iMbXy][4], 0);
+  ST32 (&pCurLayer->pNzc[iMbXy][8], 0);
+  ST32 (&pCurLayer->pNzc[iMbXy][12], 0);
+  ST32 (&pCurLayer->pNzc[iMbXy][16], 0);
+  ST32 (&pCurLayer->pNzc[iMbXy][20], 0);
+
+  if (MB_TYPE_INTRA16x16 != pCurLayer->pMbType[iMbXy]) {
+    WELS_READ_VERIFY (ParseCbpInfoCabac (pCtx, pNeighAvail, uiCbp));
+
+    pCurLayer->pCbp[iMbXy] = uiCbp;
+    pSlice->iLastDeltaQp = uiCbp == 0 ? 0 : pSlice->iLastDeltaQp;
+    uiCbpChroma = pCtx->pSps->uiChromaFormatIdc ? pCurLayer->pCbp[iMbXy] >> 4 : 0 ;
+    uiCbpLuma = pCurLayer->pCbp[iMbXy] & 15;
+  }
+
+  if (pCurLayer->pCbp[iMbXy] || MB_TYPE_INTRA16x16 == pCurLayer->pMbType[iMbXy]) {
+
+    if (MB_TYPE_INTRA16x16 != pCurLayer->pMbType[iMbXy]) {
+      // Need modification when B picutre add in
+      bool bNeedParseTransformSize8x8Flag =
+        (((pCurLayer->pMbType[iMbXy] >= MB_TYPE_16x16 && pCurLayer->pMbType[iMbXy] <= MB_TYPE_8x16)
+          || pCurLayer->pNoSubMbPartSizeLessThan8x8Flag[iMbXy])
+         && (pCurLayer->pMbType[iMbXy] != MB_TYPE_INTRA8x8)
+         && (pCurLayer->pMbType[iMbXy] != MB_TYPE_INTRA4x4)
+         && ((pCurLayer->pCbp[iMbXy] & 0x0F) > 0)
+         && (pCtx->pPps->bTransform8x8ModeFlag));
+
+      if (bNeedParseTransformSize8x8Flag) {
+        WELS_READ_VERIFY (ParseTransformSize8x8FlagCabac (pCtx, pNeighAvail,
+                          pCtx->pCurDqLayer->pTransformSize8x8Flag[iMbXy])); //transform_size_8x8_flag
+      }
+    }
+
+    memset (pCurLayer->pScaledTCoeff[iMbXy], 0, 384 * sizeof (pCurLayer->pScaledTCoeff[iMbXy][0]));
+
+    int32_t iQpDelta, iId8x8, iId4x4;
+
+    WELS_READ_VERIFY (ParseDeltaQpCabac (pCtx, iQpDelta));
+    if (iQpDelta > 25 || iQpDelta < -26) { //out of iQpDelta range
+      return GENERATE_ERROR_NO (ERR_LEVEL_MB_DATA, ERR_INFO_INVALID_QP);
+    }
+    pCurLayer->pLumaQp[iMbXy] = (pSlice->iLastMbQp + iQpDelta + 52) % 52; //update last_mb_qp
+    pSlice->iLastMbQp = pCurLayer->pLumaQp[iMbXy];
+    for (i = 0; i < 2; i++) {
+      pCurLayer->pChromaQp[iMbXy][i] = g_kuiChromaQpTable[WELS_CLIP3 (pSlice->iLastMbQp +
+                                       pSliceHeader->pPps->iChromaQpIndexOffset[i], 0, 51)];
+    }
+
+    if (MB_TYPE_INTRA16x16 == pCurLayer->pMbType[iMbXy]) {
+      //step1: Luma DC
+      WELS_READ_VERIFY (ParseResidualBlockCabac (pNeighAvail, pNonZeroCount, pBsAux, 0, 16, g_kuiLumaDcZigzagScan,
+                        I16_LUMA_DC, pCurLayer->pScaledTCoeff[iMbXy], pCurLayer->pLumaQp[iMbXy], pCtx));
+      //step2: Luma AC
+      if (uiCbpLuma) {
+        for (i = 0; i < 16; i++) {
+          WELS_READ_VERIFY (ParseResidualBlockCabac (pNeighAvail, pNonZeroCount, pBsAux, i, iScanIdxEnd - WELS_MAX (iScanIdxStart,
+                            1) + 1, g_kuiZigzagScan + WELS_MAX (iScanIdxStart, 1), I16_LUMA_AC, pCurLayer->pScaledTCoeff[iMbXy] + (i << 4),
+                            pCurLayer->pLumaQp[iMbXy], pCtx));
+        }
+        ST32 (&pCurLayer->pNzc[iMbXy][0], LD32 (&pNonZeroCount[1 + 8 * 1]));
+        ST32 (&pCurLayer->pNzc[iMbXy][4], LD32 (&pNonZeroCount[1 + 8 * 2]));
+        ST32 (&pCurLayer->pNzc[iMbXy][8], LD32 (&pNonZeroCount[1 + 8 * 3]));
+        ST32 (&pCurLayer->pNzc[iMbXy][12], LD32 (&pNonZeroCount[1 + 8 * 4]));
+      } else {
+        ST32 (&pCurLayer->pNzc[iMbXy][0], 0);
+        ST32 (&pCurLayer->pNzc[iMbXy][4], 0);
+        ST32 (&pCurLayer->pNzc[iMbXy][8], 0);
+        ST32 (&pCurLayer->pNzc[iMbXy][12], 0);
+      }
+    } else { //non-MB_TYPE_INTRA16x16
+      if (pCtx->pCurDqLayer->pTransformSize8x8Flag[iMbXy]) {
+        // Transform 8x8 support for CABAC
+        for (iId8x8 = 0; iId8x8 < 4; iId8x8++) {
+          if (uiCbpLuma & (1 << iId8x8)) {
+            WELS_READ_VERIFY (ParseResidualBlockCabac8x8 (pNeighAvail, pNonZeroCount, pBsAux, (iId8x8 << 2),
+                              iScanIdxEnd - iScanIdxStart + 1, g_kuiZigzagScan8x8 + iScanIdxStart,
+                              IS_INTRA (pCurLayer->pMbType[iMbXy]) ? LUMA_DC_AC_INTRA_8 : LUMA_DC_AC_INTER_8,
+                              pCurLayer->pScaledTCoeff[iMbXy] + (iId8x8 << 6), pCurLayer->pLumaQp[iMbXy], pCtx));
+          } else {
+            ST16 (&pNonZeroCount[g_kCacheNzcScanIdx[ (iId8x8 << 2)]], 0);
+            ST16 (&pNonZeroCount[g_kCacheNzcScanIdx[ (iId8x8 << 2) + 2]], 0);
+          }
+        }
+        ST32 (&pCurLayer->pNzc[iMbXy][0], LD32 (&pNonZeroCount[1 + 8 * 1]));
+        ST32 (&pCurLayer->pNzc[iMbXy][4], LD32 (&pNonZeroCount[1 + 8 * 2]));
+        ST32 (&pCurLayer->pNzc[iMbXy][8], LD32 (&pNonZeroCount[1 + 8 * 3]));
+        ST32 (&pCurLayer->pNzc[iMbXy][12], LD32 (&pNonZeroCount[1 + 8 * 4]));
+      } else {
+        iMbResProperty = (IS_INTRA (pCurLayer->pMbType[iMbXy])) ? LUMA_DC_AC_INTRA : LUMA_DC_AC_INTER;
+        for (iId8x8 = 0; iId8x8 < 4; iId8x8++) {
+          if (uiCbpLuma & (1 << iId8x8)) {
+            int32_t iIdx = (iId8x8 << 2);
+            for (iId4x4 = 0; iId4x4 < 4; iId4x4++) {
+              //Luma (DC and AC decoding together)
+              WELS_READ_VERIFY (ParseResidualBlockCabac (pNeighAvail, pNonZeroCount, pBsAux, iIdx, iScanIdxEnd - iScanIdxStart + 1,
+                                g_kuiZigzagScan + iScanIdxStart, iMbResProperty, pCurLayer->pScaledTCoeff[iMbXy] + (iIdx << 4),
+                                pCurLayer->pLumaQp[iMbXy],
+                                pCtx));
+              iIdx++;
+            }
+          } else {
+            ST16 (&pNonZeroCount[g_kCacheNzcScanIdx[iId8x8 << 2]], 0);
+            ST16 (&pNonZeroCount[g_kCacheNzcScanIdx[ (iId8x8 << 2) + 2]], 0);
+          }
+        }
+        ST32 (&pCurLayer->pNzc[iMbXy][0], LD32 (&pNonZeroCount[1 + 8 * 1]));
+        ST32 (&pCurLayer->pNzc[iMbXy][4], LD32 (&pNonZeroCount[1 + 8 * 2]));
+        ST32 (&pCurLayer->pNzc[iMbXy][8], LD32 (&pNonZeroCount[1 + 8 * 3]));
+        ST32 (&pCurLayer->pNzc[iMbXy][12], LD32 (&pNonZeroCount[1 + 8 * 4]));
+      }
+    }
+
+    //chroma
+    //step1: DC
+    if (1 == uiCbpChroma || 2 == uiCbpChroma) {
+      for (i = 0; i < 2; i++) {
+        if (IS_INTRA (pCurLayer->pMbType[iMbXy]))
+          iMbResProperty = i ? CHROMA_DC_V : CHROMA_DC_U;
+        else
+          iMbResProperty = i ? CHROMA_DC_V_INTER : CHROMA_DC_U_INTER;
+
+        WELS_READ_VERIFY (ParseResidualBlockCabac (pNeighAvail, pNonZeroCount, pBsAux, 16 + (i << 2), 4, g_kuiChromaDcScan,
+                          iMbResProperty, pCurLayer->pScaledTCoeff[iMbXy] + 256 + (i << 6), pCurLayer->pChromaQp[iMbXy][i], pCtx));
+      }
+    }
+    //step2: AC
+    if (2 == uiCbpChroma) {
+      for (i = 0; i < 2; i++) {
+        if (IS_INTRA (pCurLayer->pMbType[iMbXy]))
+          iMbResProperty = i ? CHROMA_AC_V : CHROMA_AC_U;
+        else
+          iMbResProperty = i ? CHROMA_AC_V_INTER : CHROMA_AC_U_INTER;
+        int32_t index = 16 + (i << 2);
+        for (iId4x4 = 0; iId4x4 < 4; iId4x4++) {
+          WELS_READ_VERIFY (ParseResidualBlockCabac (pNeighAvail, pNonZeroCount, pBsAux, index,
+                            iScanIdxEnd - WELS_MAX (iScanIdxStart, 1) + 1, g_kuiZigzagScan + WELS_MAX (iScanIdxStart, 1),
+                            iMbResProperty, pCurLayer->pScaledTCoeff[iMbXy] + (index << 4), pCurLayer->pChromaQp[iMbXy][i], pCtx));
+          index++;
+        }
+      }
+      ST16 (&pCurLayer->pNzc[iMbXy][16], LD16 (&pNonZeroCount[6 + 8 * 1]));
+      ST16 (&pCurLayer->pNzc[iMbXy][20], LD16 (&pNonZeroCount[6 + 8 * 2]));
+      ST16 (&pCurLayer->pNzc[iMbXy][18], LD16 (&pNonZeroCount[6 + 8 * 4]));
+      ST16 (&pCurLayer->pNzc[iMbXy][22], LD16 (&pNonZeroCount[6 + 8 * 5]));
+    } else {
+      ST32 (&pCurLayer->pNzc[iMbXy][16], 0);
+      ST32 (&pCurLayer->pNzc[iMbXy][20], 0);
+    }
+  } else {
+    pCurLayer->pLumaQp[iMbXy] = pSlice->iLastMbQp;
+    for (i = 0; i < 2; i++) {
+      pCurLayer->pChromaQp[iMbXy][i] = g_kuiChromaQpTable[WELS_CLIP3 (pCurLayer->pLumaQp[iMbXy] +
+                                       pSliceHeader->pPps->iChromaQpIndexOffset[i], 0, 51)];
+    }
+  }
+
+  WELS_READ_VERIFY (ParseEndOfSliceCabac (pCtx, uiEosFlag));
+  if (uiEosFlag) {
+    RestoreCabacDecEngineToBS (pCtx->pCabacDecEngine, pCtx->pCurDqLayer->pBitStringAux);
+  }
+
+  return ERR_NONE;
+}
+
+int32_t WelsDecodeMbCabacBSliceBaseMode0 (PWelsDecoderContext pCtx, PWelsNeighAvail pNeighAvail, uint32_t& uiEosFlag) {
+  PDqLayer pCurLayer = pCtx->pCurDqLayer;
+  PBitStringAux pBsAux = pCurLayer->pBitStringAux;
+  PSlice pSlice = &pCurLayer->sLayerInfo.sSliceInLayer;
+  PSliceHeader pSliceHeader = &pSlice->sSliceHeaderExt.sSliceHeader;
+
+  int32_t iScanIdxStart = pSlice->sSliceHeaderExt.uiScanIdxStart;
+  int32_t iScanIdxEnd = pSlice->sSliceHeaderExt.uiScanIdxEnd;
+  int32_t iMbXy = pCurLayer->iMbXyIndex;
+  int32_t iMbResProperty;
+  int32_t i;
+  uint32_t uiMbType = 0, uiCbp = 0, uiCbpLuma = 0, uiCbpChroma = 0;
+
+  ENFORCE_STACK_ALIGN_1D (uint8_t, pNonZeroCount, 48, 16);
+
+  pCurLayer->pInterPredictionDoneFlag[iMbXy] = 0;
+
+  WELS_READ_VERIFY (ParseMBTypeBSliceCabac (pCtx, pNeighAvail, uiMbType));
+
+  if (uiMbType < 23) { //Inter B mode
+    int16_t pMotionVector[LIST_A][30][MV_A];
+    int16_t pMvdCache[LIST_A][30][MV_A];
+    int8_t  pRefIndex[LIST_A][30];
+    int8_t  pDirect[30];
+    pCurLayer->pMbType[iMbXy] = g_ksInterBMbTypeInfo[uiMbType].iType;
+    WelsFillCacheInterCabac (pNeighAvail, pNonZeroCount, pMotionVector, pMvdCache, pRefIndex, pCurLayer);
+    WelsFillDirectCacheCabac (pNeighAvail, pDirect, pCurLayer);
+    WELS_READ_VERIFY (ParseInterBMotionInfoCabac (pCtx, pNeighAvail, pNonZeroCount, pMotionVector, pMvdCache, pRefIndex,
+                      pDirect));
+    pCurLayer->pInterPredictionDoneFlag[iMbXy] = 0;
+  } else { //Intra mode
+    uiMbType -= 23;
+    if (uiMbType > 25)
+      return GENERATE_ERROR_NO (ERR_LEVEL_MB_DATA, ERR_INFO_INVALID_MB_TYPE);
+    if (!pCtx->pSps->uiChromaFormatIdc && ((uiMbType >= 5 && uiMbType <= 12) || (uiMbType >= 17 && uiMbType <= 24)))
+      return GENERATE_ERROR_NO (ERR_LEVEL_MB_DATA, ERR_INFO_INVALID_MB_TYPE);
+
+    if (25 == uiMbType) {   //I_PCM
+      WelsLog (& (pCtx->sLogCtx), WELS_LOG_DEBUG, "I_PCM mode exists in P slice!");
+      WELS_READ_VERIFY (ParseIPCMInfoCabac (pCtx));
+      pSlice->iLastDeltaQp = 0;
+      WELS_READ_VERIFY (ParseEndOfSliceCabac (pCtx, uiEosFlag));
+      if (uiEosFlag) {
+        RestoreCabacDecEngineToBS (pCtx->pCabacDecEngine, pCtx->pCurDqLayer->pBitStringAux);
+      }
+      return ERR_NONE;
+    } else { //normal Intra mode
+      if (0 == uiMbType) { //Intra4x4
+        ENFORCE_STACK_ALIGN_1D (int8_t, pIntraPredMode, 48, 16);
+        pCurLayer->pMbType[iMbXy] = MB_TYPE_INTRA4x4;
+        if (pCtx->pPps->bTransform8x8ModeFlag) {
+          WELS_READ_VERIFY (ParseTransformSize8x8FlagCabac (pCtx, pNeighAvail, pCtx->pCurDqLayer->pTransformSize8x8Flag[iMbXy]));
+        }
+        if (pCtx->pCurDqLayer->pTransformSize8x8Flag[iMbXy]) {
+          uiMbType = pCurLayer->pMbType[iMbXy] = MB_TYPE_INTRA8x8;
+          pCtx->pFillInfoCacheIntraNxNFunc (pNeighAvail, pNonZeroCount, pIntraPredMode, pCurLayer);
+          WELS_READ_VERIFY (ParseIntra8x8Mode (pCtx, pNeighAvail, pIntraPredMode, pBsAux, pCurLayer));
+        } else {
+          pCtx->pFillInfoCacheIntraNxNFunc (pNeighAvail, pNonZeroCount, pIntraPredMode, pCurLayer);
+          WELS_READ_VERIFY (ParseIntra4x4Mode (pCtx, pNeighAvail, pIntraPredMode, pBsAux, pCurLayer));
+        }
+      } else { //Intra16x16
+        pCurLayer->pMbType[iMbXy] = MB_TYPE_INTRA16x16;
+        pCurLayer->pTransformSize8x8Flag[iMbXy] = false;
+        pCurLayer->pNoSubMbPartSizeLessThan8x8Flag[iMbXy] = true;
+        pCurLayer->pIntraPredMode[iMbXy][7] = (uiMbType - 1) & 3;
+        pCurLayer->pCbp[iMbXy] = g_kuiI16CbpTable[ (uiMbType - 1) >> 2];
+        uiCbpChroma = pCtx->pSps->uiChromaFormatIdc ? pCurLayer->pCbp[iMbXy] >> 4 : 0;
+        uiCbpLuma = pCurLayer->pCbp[iMbXy] & 15;
+        WelsFillCacheNonZeroCount (pNeighAvail, pNonZeroCount, pCurLayer);
+        WELS_READ_VERIFY (ParseIntra16x16Mode (pCtx, pNeighAvail, pBsAux, pCurLayer));
+      }
+    }
+  }
+
+  ST32 (&pCurLayer->pNzc[iMbXy][0], 0);
+  ST32 (&pCurLayer->pNzc[iMbXy][4], 0);
+  ST32 (&pCurLayer->pNzc[iMbXy][8], 0);
+  ST32 (&pCurLayer->pNzc[iMbXy][12], 0);
+  ST32 (&pCurLayer->pNzc[iMbXy][16], 0);
+  ST32 (&pCurLayer->pNzc[iMbXy][20], 0);
+
+  if (MB_TYPE_INTRA16x16 != pCurLayer->pMbType[iMbXy]) {
+    WELS_READ_VERIFY (ParseCbpInfoCabac (pCtx, pNeighAvail, uiCbp));
+
+    pCurLayer->pCbp[iMbXy] = uiCbp;
+    pSlice->iLastDeltaQp = uiCbp == 0 ? 0 : pSlice->iLastDeltaQp;
+    uiCbpChroma = pCtx->pSps->uiChromaFormatIdc ? pCurLayer->pCbp[iMbXy] >> 4 : 0;
+    uiCbpLuma = pCurLayer->pCbp[iMbXy] & 15;
+  }
+
+  if (pCurLayer->pCbp[iMbXy] || MB_TYPE_INTRA16x16 == pCurLayer->pMbType[iMbXy]) {
+
+    if (MB_TYPE_INTRA16x16 != pCurLayer->pMbType[iMbXy]) {
+      // Need modification when B picutre add in
+      bool bNeedParseTransformSize8x8Flag =
+        (((IS_INTER_16x16 (pCurLayer->pMbType[iMbXy]) || IS_DIRECT (pCurLayer->pMbType[iMbXy])
+           || IS_INTER_16x8 (pCurLayer->pMbType[iMbXy]) || IS_INTER_8x16 (pCurLayer->pMbType[iMbXy]))
+          || pCurLayer->pNoSubMbPartSizeLessThan8x8Flag[iMbXy])
+         && (pCurLayer->pMbType[iMbXy] != MB_TYPE_INTRA8x8)
+         && (pCurLayer->pMbType[iMbXy] != MB_TYPE_INTRA4x4)
+         && ((pCurLayer->pCbp[iMbXy] & 0x0F) > 0)
+         && (pCtx->pPps->bTransform8x8ModeFlag));
+
+      if (bNeedParseTransformSize8x8Flag) {
+        WELS_READ_VERIFY (ParseTransformSize8x8FlagCabac (pCtx, pNeighAvail,
+                          pCtx->pCurDqLayer->pTransformSize8x8Flag[iMbXy])); //transform_size_8x8_flag
+      }
+    }
+
+    memset (pCurLayer->pScaledTCoeff[iMbXy], 0, 384 * sizeof (pCurLayer->pScaledTCoeff[iMbXy][0]));
+
+    int32_t iQpDelta, iId8x8, iId4x4;
+
+    WELS_READ_VERIFY (ParseDeltaQpCabac (pCtx, iQpDelta));
+    if (iQpDelta > 25 || iQpDelta < -26) { //out of iQpDelta range
+      return GENERATE_ERROR_NO (ERR_LEVEL_MB_DATA, ERR_INFO_INVALID_QP);
+    }
+    pCurLayer->pLumaQp[iMbXy] = (pSlice->iLastMbQp + iQpDelta + 52) % 52; //update last_mb_qp
+    pSlice->iLastMbQp = pCurLayer->pLumaQp[iMbXy];
+    for (i = 0; i < 2; i++) {
+      pCurLayer->pChromaQp[iMbXy][i] = g_kuiChromaQpTable[WELS_CLIP3 (pSlice->iLastMbQp +
+                                       pSliceHeader->pPps->iChromaQpIndexOffset[i], 0, 51)];
+    }
+
+    if (MB_TYPE_INTRA16x16 == pCurLayer->pMbType[iMbXy]) {
+      //step1: Luma DC
+      WELS_READ_VERIFY (ParseResidualBlockCabac (pNeighAvail, pNonZeroCount, pBsAux, 0, 16, g_kuiLumaDcZigzagScan,
+                        I16_LUMA_DC, pCurLayer->pScaledTCoeff[iMbXy], pCurLayer->pLumaQp[iMbXy], pCtx));
+      //step2: Luma AC
+      if (uiCbpLuma) {
+        for (i = 0; i < 16; i++) {
+          WELS_READ_VERIFY (ParseResidualBlockCabac (pNeighAvail, pNonZeroCount, pBsAux, i, iScanIdxEnd - WELS_MAX (iScanIdxStart,
+                            1) + 1, g_kuiZigzagScan + WELS_MAX (iScanIdxStart, 1), I16_LUMA_AC, pCurLayer->pScaledTCoeff[iMbXy] + (i << 4),
+                            pCurLayer->pLumaQp[iMbXy], pCtx));
+        }
+        ST32 (&pCurLayer->pNzc[iMbXy][0], LD32 (&pNonZeroCount[1 + 8 * 1]));
+        ST32 (&pCurLayer->pNzc[iMbXy][4], LD32 (&pNonZeroCount[1 + 8 * 2]));
+        ST32 (&pCurLayer->pNzc[iMbXy][8], LD32 (&pNonZeroCount[1 + 8 * 3]));
+        ST32 (&pCurLayer->pNzc[iMbXy][12], LD32 (&pNonZeroCount[1 + 8 * 4]));
+      } else {
+        ST32 (&pCurLayer->pNzc[iMbXy][0], 0);
+        ST32 (&pCurLayer->pNzc[iMbXy][4], 0);
+        ST32 (&pCurLayer->pNzc[iMbXy][8], 0);
+        ST32 (&pCurLayer->pNzc[iMbXy][12], 0);
+      }
+    } else { //non-MB_TYPE_INTRA16x16
+      if (pCtx->pCurDqLayer->pTransformSize8x8Flag[iMbXy]) {
+        // Transform 8x8 support for CABAC
+        for (iId8x8 = 0; iId8x8 < 4; iId8x8++) {
+          if (uiCbpLuma & (1 << iId8x8)) {
+            WELS_READ_VERIFY (ParseResidualBlockCabac8x8 (pNeighAvail, pNonZeroCount, pBsAux, (iId8x8 << 2),
+                              iScanIdxEnd - iScanIdxStart + 1, g_kuiZigzagScan8x8 + iScanIdxStart,
+                              IS_INTRA (pCurLayer->pMbType[iMbXy]) ? LUMA_DC_AC_INTRA_8 : LUMA_DC_AC_INTER_8,
+                              pCurLayer->pScaledTCoeff[iMbXy] + (iId8x8 << 6), pCurLayer->pLumaQp[iMbXy], pCtx));
+          } else {
+            ST16 (&pNonZeroCount[g_kCacheNzcScanIdx[ (iId8x8 << 2)]], 0);
+            ST16 (&pNonZeroCount[g_kCacheNzcScanIdx[ (iId8x8 << 2) + 2]], 0);
+          }
+        }
+        ST32 (&pCurLayer->pNzc[iMbXy][0], LD32 (&pNonZeroCount[1 + 8 * 1]));
+        ST32 (&pCurLayer->pNzc[iMbXy][4], LD32 (&pNonZeroCount[1 + 8 * 2]));
+        ST32 (&pCurLayer->pNzc[iMbXy][8], LD32 (&pNonZeroCount[1 + 8 * 3]));
+        ST32 (&pCurLayer->pNzc[iMbXy][12], LD32 (&pNonZeroCount[1 + 8 * 4]));
+      } else {
+        iMbResProperty = (IS_INTRA (pCurLayer->pMbType[iMbXy])) ? LUMA_DC_AC_INTRA : LUMA_DC_AC_INTER;
+        for (iId8x8 = 0; iId8x8 < 4; iId8x8++) {
+          if (uiCbpLuma & (1 << iId8x8)) {
+            int32_t iIdx = (iId8x8 << 2);
+            for (iId4x4 = 0; iId4x4 < 4; iId4x4++) {
+              //Luma (DC and AC decoding together)
+              WELS_READ_VERIFY (ParseResidualBlockCabac (pNeighAvail, pNonZeroCount, pBsAux, iIdx, iScanIdxEnd - iScanIdxStart + 1,
+                                g_kuiZigzagScan + iScanIdxStart, iMbResProperty, pCurLayer->pScaledTCoeff[iMbXy] + (iIdx << 4),
+                                pCurLayer->pLumaQp[iMbXy],
+                                pCtx));
+              iIdx++;
+            }
+          } else {
+            ST16 (&pNonZeroCount[g_kCacheNzcScanIdx[iId8x8 << 2]], 0);
+            ST16 (&pNonZeroCount[g_kCacheNzcScanIdx[ (iId8x8 << 2) + 2]], 0);
+          }
+        }
+        ST32 (&pCurLayer->pNzc[iMbXy][0], LD32 (&pNonZeroCount[1 + 8 * 1]));
+        ST32 (&pCurLayer->pNzc[iMbXy][4], LD32 (&pNonZeroCount[1 + 8 * 2]));
+        ST32 (&pCurLayer->pNzc[iMbXy][8], LD32 (&pNonZeroCount[1 + 8 * 3]));
+        ST32 (&pCurLayer->pNzc[iMbXy][12], LD32 (&pNonZeroCount[1 + 8 * 4]));
+      }
+    }
+
+    //chroma
+    //step1: DC
+    if (1 == uiCbpChroma || 2 == uiCbpChroma) {
+      for (i = 0; i < 2; i++) {
+        if (IS_INTRA (pCurLayer->pMbType[iMbXy]))
+          iMbResProperty = i ? CHROMA_DC_V : CHROMA_DC_U;
+        else
+          iMbResProperty = i ? CHROMA_DC_V_INTER : CHROMA_DC_U_INTER;
+
+        WELS_READ_VERIFY (ParseResidualBlockCabac (pNeighAvail, pNonZeroCount, pBsAux, 16 + (i << 2), 4, g_kuiChromaDcScan,
+                          iMbResProperty, pCurLayer->pScaledTCoeff[iMbXy] + 256 + (i << 6), pCurLayer->pChromaQp[iMbXy][i], pCtx));
+      }
+    }
+    //step2: AC
+    if (2 == uiCbpChroma) {
+      for (i = 0; i < 2; i++) {
+        if (IS_INTRA (pCurLayer->pMbType[iMbXy]))
+          iMbResProperty = i ? CHROMA_AC_V : CHROMA_AC_U;
+        else
+          iMbResProperty = i ? CHROMA_AC_V_INTER : CHROMA_AC_U_INTER;
+        int32_t index = 16 + (i << 2);
+        for (iId4x4 = 0; iId4x4 < 4; iId4x4++) {
+          WELS_READ_VERIFY (ParseResidualBlockCabac (pNeighAvail, pNonZeroCount, pBsAux, index,
+                            iScanIdxEnd - WELS_MAX (iScanIdxStart, 1) + 1, g_kuiZigzagScan + WELS_MAX (iScanIdxStart, 1),
+                            iMbResProperty, pCurLayer->pScaledTCoeff[iMbXy] + (index << 4), pCurLayer->pChromaQp[iMbXy][i], pCtx));
+          index++;
+        }
+      }
+      ST16 (&pCurLayer->pNzc[iMbXy][16], LD16 (&pNonZeroCount[6 + 8 * 1]));
+      ST16 (&pCurLayer->pNzc[iMbXy][20], LD16 (&pNonZeroCount[6 + 8 * 2]));
+      ST16 (&pCurLayer->pNzc[iMbXy][18], LD16 (&pNonZeroCount[6 + 8 * 4]));
+      ST16 (&pCurLayer->pNzc[iMbXy][22], LD16 (&pNonZeroCount[6 + 8 * 5]));
+    } else {
+      ST32 (&pCurLayer->pNzc[iMbXy][16], 0);
+      ST32 (&pCurLayer->pNzc[iMbXy][20], 0);
+    }
+  } else {
+    pCurLayer->pLumaQp[iMbXy] = pSlice->iLastMbQp;
+    for (i = 0; i < 2; i++) {
+      pCurLayer->pChromaQp[iMbXy][i] = g_kuiChromaQpTable[WELS_CLIP3 (pCurLayer->pLumaQp[iMbXy] +
+                                       pSliceHeader->pPps->iChromaQpIndexOffset[i], 0, 51)];
+    }
+  }
+
+  WELS_READ_VERIFY (ParseEndOfSliceCabac (pCtx, uiEosFlag));
+  if (uiEosFlag) {
+    RestoreCabacDecEngineToBS (pCtx->pCabacDecEngine, pCtx->pCurDqLayer->pBitStringAux);
+  }
+
+  return ERR_NONE;
+}
+
+
+int32_t WelsDecodeMbCabacPSlice (PWelsDecoderContext pCtx, PNalUnit pNalCur, uint32_t& uiEosFlag) {
+  PDqLayer pCurLayer             = pCtx->pCurDqLayer;
+  PSlice pSlice                  = &pCurLayer->sLayerInfo.sSliceInLayer;
+  PSliceHeader pSliceHeader      = &pSlice->sSliceHeaderExt.sSliceHeader;
+  PPicture* ppRefPic = pCtx->sRefPic.pRefList[LIST_0];
+  uint32_t uiCode;
+  int32_t iMbXy = pCurLayer->iMbXyIndex;
+  int32_t i;
+  SWelsNeighAvail uiNeighAvail;
+  pCurLayer->pCbp[iMbXy] = 0;
+  pCurLayer->pCbfDc[iMbXy] = 0;
+  pCurLayer->pChromaPredMode[iMbXy] = C_PRED_DC;
+
+  pCurLayer->pNoSubMbPartSizeLessThan8x8Flag[iMbXy] = true;
+  pCurLayer->pTransformSize8x8Flag[iMbXy] = false;
+
+  GetNeighborAvailMbType (&uiNeighAvail, pCurLayer);
+  WELS_READ_VERIFY (ParseSkipFlagCabac (pCtx, &uiNeighAvail, uiCode));
+
+  if (uiCode) {
+    int16_t pMv[2] = {0};
+    pCurLayer->pMbType[iMbXy] = MB_TYPE_SKIP;
+    ST32 (&pCurLayer->pNzc[iMbXy][0], 0);
+    ST32 (&pCurLayer->pNzc[iMbXy][4], 0);
+    ST32 (&pCurLayer->pNzc[iMbXy][8], 0);
+    ST32 (&pCurLayer->pNzc[iMbXy][12], 0);
+    ST32 (&pCurLayer->pNzc[iMbXy][16], 0);
+    ST32 (&pCurLayer->pNzc[iMbXy][20], 0);
+
+    pCurLayer->pInterPredictionDoneFlag[iMbXy] = 0;
+    memset (pCurLayer->pRefIndex[0][iMbXy], 0, sizeof (int8_t) * 16);
+    pCtx->bMbRefConcealed = pCtx->bRPLRError || pCtx->bMbRefConcealed || ! (ppRefPic[0] && ppRefPic[0]->bIsComplete);
+    //predict mv
+    PredPSkipMvFromNeighbor (pCurLayer, pMv);
+    for (i = 0; i < 16; i++) {
+      ST32 (pCurLayer->pMv[0][iMbXy][i], * (uint32_t*)pMv);
+      ST32 (pCurLayer->pMvd[0][iMbXy][i], 0);
+    }
+
+    //if (!pSlice->sSliceHeaderExt.bDefaultResidualPredFlag) {
+    //  memset (pCurLayer->pScaledTCoeff[iMbXy], 0, 384 * sizeof (int16_t));
+    //}
+
+    //reset rS
+    pCurLayer->pLumaQp[iMbXy] = pSlice->iLastMbQp; //??????????????? dqaunt of previous mb
+    for (i = 0; i < 2; i++) {
+      pCurLayer->pChromaQp[iMbXy][i] = g_kuiChromaQpTable[WELS_CLIP3 (pCurLayer->pLumaQp[iMbXy] +
+                                       pSliceHeader->pPps->iChromaQpIndexOffset[i], 0, 51)];
+    }
+
+    //for neighboring CABAC usage
+    pSlice->iLastDeltaQp = 0;
+
+    WELS_READ_VERIFY (ParseEndOfSliceCabac (pCtx, uiEosFlag));
+
+    return ERR_NONE;
+  }
+
+  WELS_READ_VERIFY (WelsDecodeMbCabacPSliceBaseMode0 (pCtx, &uiNeighAvail, uiEosFlag));
+  return ERR_NONE;
+}
+
+
+int32_t WelsDecodeMbCabacBSlice (PWelsDecoderContext pCtx, PNalUnit pNalCur, uint32_t& uiEosFlag) {
+  PDqLayer pCurLayer = pCtx->pCurDqLayer;
+  PSlice pSlice = &pCurLayer->sLayerInfo.sSliceInLayer;
+  PSliceHeader pSliceHeader = &pSlice->sSliceHeaderExt.sSliceHeader;
+  PPicture* ppRefPicL0 = pCtx->sRefPic.pRefList[LIST_0];
+  PPicture* ppRefPicL1 = pCtx->sRefPic.pRefList[LIST_1];
+  uint32_t uiCode;
+  int32_t iMbXy = pCurLayer->iMbXyIndex;
+  int32_t i;
+  SWelsNeighAvail uiNeighAvail;
+  pCurLayer->pCbp[iMbXy] = 0;
+  pCurLayer->pCbfDc[iMbXy] = 0;
+  pCurLayer->pChromaPredMode[iMbXy] = C_PRED_DC;
+
+  pCurLayer->pNoSubMbPartSizeLessThan8x8Flag[iMbXy] = true;
+  pCurLayer->pTransformSize8x8Flag[iMbXy] = false;
+
+  GetNeighborAvailMbType (&uiNeighAvail, pCurLayer);
+  WELS_READ_VERIFY (ParseSkipFlagCabac (pCtx, &uiNeighAvail, uiCode));
+
+  memset (pCurLayer->pDirect[iMbXy], 0, sizeof (int8_t) * 16);
+
+  if (uiCode) {
+    int16_t pMv[LIST_A][2] = { {0, 0}, { 0, 0 } };
+    int8_t  ref[LIST_A] = { 0 };
+    pCurLayer->pMbType[iMbXy] = MB_TYPE_SKIP | MB_TYPE_DIRECT;
+    ST32 (&pCurLayer->pNzc[iMbXy][0], 0);
+    ST32 (&pCurLayer->pNzc[iMbXy][4], 0);
+    ST32 (&pCurLayer->pNzc[iMbXy][8], 0);
+    ST32 (&pCurLayer->pNzc[iMbXy][12], 0);
+    ST32 (&pCurLayer->pNzc[iMbXy][16], 0);
+    ST32 (&pCurLayer->pNzc[iMbXy][20], 0);
+
+    pCurLayer->pInterPredictionDoneFlag[iMbXy] = 0;
+    memset (pCurLayer->pRefIndex[LIST_0][iMbXy], 0, sizeof (int8_t) * 16);
+    memset (pCurLayer->pRefIndex[LIST_1][iMbXy], 0, sizeof (int8_t) * 16);
+    pCtx->bMbRefConcealed = pCtx->bRPLRError || pCtx->bMbRefConcealed || ! (ppRefPicL0[0] && ppRefPicL0[0]->bIsComplete)
+                            || ! (ppRefPicL1[0] && ppRefPicL1[0]->bIsComplete);
+
+    if (pSliceHeader->iDirectSpatialMvPredFlag) {
+
+      //predict direct spatial mv
+      PredMvBDirectSpatial (pCtx, pMv, ref);
+    } else {
+      //temporal direct mode
+      ComputeColocated (pCtx);
+      PredBDirectTemporal (pCtx, pMv, ref);
+    }
+
+    //reset rS
+    pCurLayer->pLumaQp[iMbXy] = pSlice->iLastMbQp; //??????????????? dqaunt of previous mb
+    for (i = 0; i < 2; i++) {
+      pCurLayer->pChromaQp[iMbXy][i] = g_kuiChromaQpTable[WELS_CLIP3 (pCurLayer->pLumaQp[iMbXy] +
+                                       pSliceHeader->pPps->iChromaQpIndexOffset[i], 0, 51)];
+    }
+
+    //for neighboring CABAC usage
+    pSlice->iLastDeltaQp = 0;
+
+    WELS_READ_VERIFY (ParseEndOfSliceCabac (pCtx, uiEosFlag));
+
+    return ERR_NONE;
+  }
+
+  WELS_READ_VERIFY (WelsDecodeMbCabacBSliceBaseMode0 (pCtx, &uiNeighAvail, uiEosFlag));
+  return ERR_NONE;
+}
+
+// Calculate deqaunt coeff scaling list value
+int32_t  WelsCalcDeqCoeffScalingList (PWelsDecoderContext pCtx) {
+  if (pCtx->pSps->bSeqScalingMatrixPresentFlag || pCtx->pPps->bPicScalingMatrixPresentFlag) {
+    pCtx->bUseScalingList = true;
+
+    if (!pCtx->bDequantCoeff4x4Init || (pCtx->iDequantCoeffPpsid != pCtx->pPps->iPpsId)) {
+      int i, q, x, y;
+      //Init dequant coeff value for different QP
+      for (i = 0; i < 6; i++) {
+        pCtx->pDequant_coeff4x4[i] = pCtx->pDequant_coeff_buffer4x4[i];
+        pCtx->pDequant_coeff8x8[i] = pCtx->pDequant_coeff_buffer8x8[i];
+        for (q = 0; q < 51; q++) {
+          for (x = 0; x < 16; x++) {
+            pCtx->pDequant_coeff4x4[i][q][x] = pCtx->pPps->bPicScalingMatrixPresentFlag ? pCtx->pPps->iScalingList4x4[i][x] *
+                                               g_kuiDequantCoeff[q][x & 0x07] : pCtx->pSps->iScalingList4x4[i][x] * g_kuiDequantCoeff[q][x & 0x07];
+          }
+          for (y = 0; y < 64; y++) {
+            pCtx->pDequant_coeff8x8[i][q][y] = pCtx->pPps->bPicScalingMatrixPresentFlag ? pCtx->pPps->iScalingList8x8[i][y] *
+                                               g_kuiMatrixV[q % 6][y / 8][y % 8] : pCtx->pSps->iScalingList8x8[i][y] * g_kuiMatrixV[q % 6][y / 8][y % 8];
+          }
+        }
+      }
+      pCtx->bDequantCoeff4x4Init = true;
+      pCtx->iDequantCoeffPpsid = pCtx->pPps->iPpsId;
+    }
+  } else
+    pCtx->bUseScalingList = false;
+  return ERR_NONE;
+}
+
+int32_t WelsDecodeSlice (PWelsDecoderContext pCtx, bool bFirstSliceInLayer, PNalUnit pNalCur) {
+  PDqLayer pCurLayer = pCtx->pCurDqLayer;
+  PFmo pFmo = pCtx->pFmo;
+  int32_t iRet;
+  int32_t iNextMbXyIndex, iSliceIdc;
+
+  PSlice pSlice = &pCurLayer->sLayerInfo.sSliceInLayer;
+  PSliceHeaderExt pSliceHeaderExt = &pSlice->sSliceHeaderExt;
+  PSliceHeader pSliceHeader = &pSliceHeaderExt->sSliceHeader;
+  int32_t iMbX, iMbY;
+  const int32_t kiCountNumMb = pSliceHeader->pSps->uiTotalMbCount; //need to be correct when fmo or multi slice
+  uint32_t uiEosFlag = 0;
+  PWelsDecMbFunc pDecMbFunc;
+
+  pSlice->iTotalMbInCurSlice = 0; //initialize at the starting of slice decoding.
+
+  if (pCtx->pPps->bEntropyCodingModeFlag) {
+    if (pSlice->sSliceHeaderExt.bAdaptiveMotionPredFlag ||
+        pSlice->sSliceHeaderExt.bAdaptiveBaseModeFlag ||
+        pSlice->sSliceHeaderExt.bAdaptiveResidualPredFlag) {
+      WelsLog (& (pCtx->sLogCtx), WELS_LOG_ERROR,
+               "WelsDecodeSlice()::::ILP flag exist, not supported with CABAC enabled!");
+      pCtx->iErrorCode |= dsBitstreamError;
+      return dsBitstreamError;
+    }
+    if (P_SLICE == pSliceHeader->eSliceType)
+      pDecMbFunc = WelsDecodeMbCabacPSlice;
+    else if (B_SLICE == pSliceHeader->eSliceType)
+      pDecMbFunc = WelsDecodeMbCabacBSlice;
+    else //I_SLICE. B_SLICE is being supported
+      pDecMbFunc = WelsDecodeMbCabacISlice;
+  } else {
+    if (P_SLICE == pSliceHeader->eSliceType) {
+      pDecMbFunc = WelsDecodeMbCavlcPSlice;
+    } else { //I_SLICE
+      pDecMbFunc = WelsDecodeMbCavlcISlice;
+    }
+  }
+
+  if (pSliceHeader->pPps->bConstainedIntraPredFlag) {
+    pCtx->pFillInfoCacheIntraNxNFunc = WelsFillCacheConstrain1IntraNxN;
+    pCtx->pMapNxNNeighToSampleFunc    = WelsMapNxNNeighToSampleConstrain1;
+    pCtx->pMap16x16NeighToSampleFunc  = WelsMap16x16NeighToSampleConstrain1;
+  } else {
+    pCtx->pFillInfoCacheIntraNxNFunc = WelsFillCacheConstrain0IntraNxN;
+    pCtx->pMapNxNNeighToSampleFunc    = WelsMapNxNNeighToSampleNormal;
+    pCtx->pMap16x16NeighToSampleFunc  = WelsMap16x16NeighToSampleNormal;
+  }
+
+  pCtx->eSliceType = pSliceHeader->eSliceType;
+  if (pCurLayer->sLayerInfo.pPps->bEntropyCodingModeFlag == 1) {
+    int32_t iQp = pSlice->sSliceHeaderExt.sSliceHeader.iSliceQp;
+    int32_t iCabacInitIdc = pSlice->sSliceHeaderExt.sSliceHeader.iCabacInitIdc;
+    WelsCabacContextInit (pCtx, pSlice->eSliceType, iCabacInitIdc, iQp);
+    //InitCabacCtx (pCtx->pCabacCtx, pSlice->eSliceType, iCabacInitIdc, iQp);
+    pSlice->iLastDeltaQp = 0;
+    WELS_READ_VERIFY (InitCabacDecEngineFromBS (pCtx->pCabacDecEngine, pCtx->pCurDqLayer->pBitStringAux));
+  }
+  //try to calculate  the dequant_coeff
+  WelsCalcDeqCoeffScalingList (pCtx);
+
+  iNextMbXyIndex = pSliceHeader->iFirstMbInSlice;
+  iMbX = iNextMbXyIndex % pCurLayer->iMbWidth;
+  iMbY = iNextMbXyIndex / pCurLayer->iMbWidth; // error is introduced by multiple slices case, 11/23/2009
+  pSlice->iMbSkipRun = -1;
+  iSliceIdc = (pSliceHeader->iFirstMbInSlice << 7) + pCurLayer->uiLayerDqId;
+
+  pCurLayer->iMbX =  iMbX;
+  pCurLayer->iMbY = iMbY;
+  pCurLayer->iMbXyIndex = iNextMbXyIndex;
+
+  do {
+    if ((-1 == iNextMbXyIndex) || (iNextMbXyIndex >= kiCountNumMb)) { // slice group boundary or end of a frame
+      break;
+    }
+
+    pCurLayer->pSliceIdc[iNextMbXyIndex] = iSliceIdc;
+    pCtx->bMbRefConcealed = false;
+    iRet = pDecMbFunc (pCtx,  pNalCur, uiEosFlag);
+    pCurLayer->pMbRefConcealedFlag[iNextMbXyIndex] = pCtx->bMbRefConcealed;
+    if (iRet != ERR_NONE) {
+      return iRet;
+    }
+
+    ++pSlice->iTotalMbInCurSlice;
+    if (uiEosFlag) { //end of slice
+      break;
+    }
+    if (pSliceHeader->pPps->uiNumSliceGroups > 1) {
+      iNextMbXyIndex = FmoNextMb (pFmo, iNextMbXyIndex);
+    } else {
+      ++iNextMbXyIndex;
+    }
+    iMbX = iNextMbXyIndex % pCurLayer->iMbWidth;
+    iMbY = iNextMbXyIndex / pCurLayer->iMbWidth;
+    pCurLayer->iMbX =  iMbX;
+    pCurLayer->iMbY = iMbY;
+    pCurLayer->iMbXyIndex = iNextMbXyIndex;
+  } while (1);
+
+  return ERR_NONE;
+}
+
+int32_t WelsActualDecodeMbCavlcISlice (PWelsDecoderContext pCtx) {
+  SVlcTable* pVlcTable     = &pCtx->sVlcTable;
+  PDqLayer pCurLayer             = pCtx->pCurDqLayer;
+  PBitStringAux pBs              = pCurLayer->pBitStringAux;
+  PSlice pSlice                  = &pCurLayer->sLayerInfo.sSliceInLayer;
+  PSliceHeader pSliceHeader      = &pSlice->sSliceHeaderExt.sSliceHeader;
+
+  SWelsNeighAvail sNeighAvail;
+  int32_t iMbResProperty;
+
+  int32_t iScanIdxStart = pSlice->sSliceHeaderExt.uiScanIdxStart;
+  int32_t iScanIdxEnd   = pSlice->sSliceHeaderExt.uiScanIdxEnd;
+
+  int32_t iMbX = pCurLayer->iMbX;
+  int32_t iMbY = pCurLayer->iMbY;
+  const int32_t iMbXy = pCurLayer->iMbXyIndex;
+  int8_t* pNzc = pCurLayer->pNzc[iMbXy];
+  int32_t i;
+  int32_t iRet = ERR_NONE;
+  uint32_t uiMbType = 0, uiCbp = 0, uiCbpL = 0, uiCbpC = 0;
+  uint32_t uiCode;
+  int32_t iCode;
+
+  ENFORCE_STACK_ALIGN_1D (uint8_t, pNonZeroCount, 48, 16);
+  GetNeighborAvailMbType (&sNeighAvail, pCurLayer);
+  pCurLayer->pInterPredictionDoneFlag[iMbXy] = 0;
+  pCurLayer->pResidualPredFlag[iMbXy] = pSlice->sSliceHeaderExt.bDefaultResidualPredFlag;
+
+  pCurLayer->pNoSubMbPartSizeLessThan8x8Flag[iMbXy] = true;
+  pCurLayer->pTransformSize8x8Flag[iMbXy] = false;
+
+  WELS_READ_VERIFY (BsGetUe (pBs, &uiCode)); //uiMbType
+  uiMbType = uiCode;
+  if (uiMbType > 25)
+    return GENERATE_ERROR_NO (ERR_LEVEL_MB_DATA, ERR_INFO_INVALID_MB_TYPE);
+  if (!pCtx->pSps->uiChromaFormatIdc && ((uiMbType >= 5 && uiMbType <= 12) || (uiMbType >= 17 && uiMbType <= 24)))
+    return GENERATE_ERROR_NO (ERR_LEVEL_MB_DATA, ERR_INFO_INVALID_MB_TYPE);
+
+  if (25 == uiMbType) {
+    WelsLog (& (pCtx->sLogCtx), WELS_LOG_DEBUG, "I_PCM mode exists in I slice!");
+    int32_t iDecStrideL = pCurLayer->pDec->iLinesize[0];
+    int32_t iDecStrideC = pCurLayer->pDec->iLinesize[1];
+
+    int32_t iOffsetL = (iMbX + iMbY * iDecStrideL) << 4;
+    int32_t iOffsetC = (iMbX + iMbY * iDecStrideC) << 3;
+
+    uint8_t* pDecY = pCurLayer->pDec->pData[0] + iOffsetL;
+    uint8_t* pDecU = pCurLayer->pDec->pData[1] + iOffsetC;
+    uint8_t* pDecV = pCurLayer->pDec->pData[2] + iOffsetC;
+
+    uint8_t* pTmpBsBuf;
+
+
+    int32_t i;
+    int32_t iCopySizeY  = (sizeof (uint8_t) << 4);
+    int32_t iCopySizeUV = (sizeof (uint8_t) << 3);
+
+    int32_t iIndex = ((-pBs->iLeftBits) >> 3) + 2;
+
+    pCurLayer->pMbType[iMbXy] = MB_TYPE_INTRA_PCM;
+
+    //step 1: locating bit-stream pointer [must align into integer byte]
+    pBs->pCurBuf -= iIndex;
+
+    //step 2: copy pixel from bit-stream into fdec [reconstruction]
+    pTmpBsBuf = pBs->pCurBuf;
+    if (!pCtx->pParam->bParseOnly) {
+      for (i = 0; i < 16; i++) { //luma
+        memcpy (pDecY, pTmpBsBuf, iCopySizeY);
+        pDecY += iDecStrideL;
+        pTmpBsBuf += 16;
+      }
+      for (i = 0; i < 8; i++) { //cb
+        memcpy (pDecU, pTmpBsBuf, iCopySizeUV);
+        pDecU += iDecStrideC;
+        pTmpBsBuf += 8;
+      }
+      for (i = 0; i < 8; i++) { //cr
+        memcpy (pDecV, pTmpBsBuf, iCopySizeUV);
+        pDecV += iDecStrideC;
+        pTmpBsBuf += 8;
+      }
+    }
+
+    pBs->pCurBuf += 384;
+
+    //step 3: update QP and pNonZeroCount
+    pCurLayer->pLumaQp[iMbXy] = 0;
+    memset (pCurLayer->pChromaQp[iMbXy], 0, sizeof (pCurLayer->pChromaQp[iMbXy]));
+    memset (pNzc, 16, sizeof (pCurLayer->pNzc[iMbXy]));   //Rec. 9.2.1 for PCM, nzc=16
+    WELS_READ_VERIFY (InitReadBits (pBs, 0));
+    return ERR_NONE;
+  } else if (0 == uiMbType) { //reference to JM
+    ENFORCE_STACK_ALIGN_1D (int8_t, pIntraPredMode, 48, 16);
+    pCurLayer->pMbType[iMbXy] = MB_TYPE_INTRA4x4;
+    if (pCtx->pPps->bTransform8x8ModeFlag) {
+      WELS_READ_VERIFY (BsGetOneBit (pBs, &uiCode)); //transform_size_8x8_flag
+      pCurLayer->pTransformSize8x8Flag[iMbXy] = !!uiCode;
+      if (pCurLayer->pTransformSize8x8Flag[iMbXy]) {
+        uiMbType = pCurLayer->pMbType[iMbXy] = MB_TYPE_INTRA8x8;
+      }
+    }
+    if (!pCurLayer->pTransformSize8x8Flag[iMbXy]) {
+      pCtx->pFillInfoCacheIntraNxNFunc (&sNeighAvail, pNonZeroCount, pIntraPredMode, pCurLayer);
+      WELS_READ_VERIFY (ParseIntra4x4Mode (pCtx, &sNeighAvail, pIntraPredMode, pBs, pCurLayer));
+    } else {
+      pCtx->pFillInfoCacheIntraNxNFunc (&sNeighAvail, pNonZeroCount, pIntraPredMode, pCurLayer);
+      WELS_READ_VERIFY (ParseIntra8x8Mode (pCtx, &sNeighAvail, pIntraPredMode, pBs, pCurLayer));
+    }
+
+    //uiCbp
+    WELS_READ_VERIFY (BsGetUe (pBs, &uiCode)); //coded_block_pattern
+    uiCbp = uiCode;
+    //G.9.1 Alternative parsing process for coded pBlock pattern
+    if (pCtx->pSps->uiChromaFormatIdc && (uiCbp > 47))
+      return GENERATE_ERROR_NO (ERR_LEVEL_MB_DATA, ERR_INFO_INVALID_CBP);
+    if (!pCtx->pSps->uiChromaFormatIdc && (uiCbp > 15))
+      return GENERATE_ERROR_NO (ERR_LEVEL_MB_DATA, ERR_INFO_INVALID_CBP);
+
+    if (pCtx->pSps->uiChromaFormatIdc)
+      uiCbp = g_kuiIntra4x4CbpTable[uiCbp];
+    else
+      uiCbp = g_kuiIntra4x4CbpTable400[uiCbp];
+    pCurLayer->pCbp[iMbXy] = uiCbp;
+    uiCbpC = uiCbp >> 4;
+    uiCbpL = uiCbp & 15;
+  } else { //I_PCM exclude, we can ignore it
+    pCurLayer->pMbType[iMbXy] = MB_TYPE_INTRA16x16;
+    pCurLayer->pTransformSize8x8Flag[iMbXy] = false;
+    pCurLayer->pNoSubMbPartSizeLessThan8x8Flag[iMbXy] = true;
+    pCurLayer->pIntraPredMode[iMbXy][7] = (uiMbType - 1) & 3;
+    pCurLayer->pCbp[iMbXy] = g_kuiI16CbpTable[ (uiMbType - 1) >> 2];
+    uiCbpC = pCtx->pSps->uiChromaFormatIdc ? pCurLayer->pCbp[iMbXy] >> 4 : 0;
+    uiCbpL = pCurLayer->pCbp[iMbXy] & 15;
+    WelsFillCacheNonZeroCount (&sNeighAvail, pNonZeroCount, pCurLayer);
+    WELS_READ_VERIFY (ParseIntra16x16Mode (pCtx, &sNeighAvail, pBs, pCurLayer));
+  }
+
+  ST32A4 (&pNzc[0], 0);
+  ST32A4 (&pNzc[4], 0);
+  ST32A4 (&pNzc[8], 0);
+  ST32A4 (&pNzc[12], 0);
+  ST32A4 (&pNzc[16], 0);
+  ST32A4 (&pNzc[20], 0);
+
+  if (pCurLayer->pCbp[iMbXy] == 0 && IS_INTRANxN (pCurLayer->pMbType[iMbXy])) {
+    pCurLayer->pLumaQp[iMbXy] = pSlice->iLastMbQp;
+    for (i = 0; i < 2; i++) {
+      pCurLayer->pChromaQp[iMbXy][i] = g_kuiChromaQpTable[WELS_CLIP3 (pCurLayer->pLumaQp[iMbXy] +
+                                       pSliceHeader->pPps->iChromaQpIndexOffset[i], 0, 51)];
+    }
+
+  }
+
+  if (pCurLayer->pCbp[iMbXy] || MB_TYPE_INTRA16x16 == pCurLayer->pMbType[iMbXy]) {
+    memset (pCurLayer->pScaledTCoeff[iMbXy], 0, 384 * sizeof (pCurLayer->pScaledTCoeff[iMbXy][0]));
+    int32_t iQpDelta, iId8x8, iId4x4;
+
+    WELS_READ_VERIFY (BsGetSe (pBs, &iCode)); //mb_qp_delta
+    iQpDelta = iCode;
+
+    if (iQpDelta > 25 || iQpDelta < -26) { //out of iQpDelta range
+      return GENERATE_ERROR_NO (ERR_LEVEL_MB_DATA, ERR_INFO_INVALID_QP);
+    }
+
+    pCurLayer->pLumaQp[iMbXy] = (pSlice->iLastMbQp + iQpDelta + 52) % 52; //update last_mb_qp
+    pSlice->iLastMbQp = pCurLayer->pLumaQp[iMbXy];
+    for (i = 0; i < 2; i++) {
+      pCurLayer->pChromaQp[iMbXy][i] = g_kuiChromaQpTable[WELS_CLIP3 (pSlice->iLastMbQp +
+                                       pSliceHeader->pPps->iChromaQpIndexOffset[i], 0,
+                                       51)];
+    }
+
+
+    BsStartCavlc (pBs);
+
+    if (MB_TYPE_INTRA16x16 == pCurLayer->pMbType[iMbXy]) {
+      //step1: Luma DC
+      if ((iRet = WelsResidualBlockCavlc (pVlcTable, pNonZeroCount, pBs, 0, 16, g_kuiLumaDcZigzagScan, I16_LUMA_DC,
+                                          pCurLayer->pScaledTCoeff[iMbXy], pCurLayer->pLumaQp[iMbXy], pCtx)) != ERR_NONE) {
+        return iRet;//abnormal
+      }
+      //step2: Luma AC
+      if (uiCbpL) {
+        for (i = 0; i < 16; i++) {
+          if ((iRet = WelsResidualBlockCavlc (pVlcTable, pNonZeroCount, pBs, i, iScanIdxEnd - WELS_MAX (iScanIdxStart, 1) + 1,
+                                              g_kuiZigzagScan + WELS_MAX (iScanIdxStart, 1), I16_LUMA_AC, pCurLayer->pScaledTCoeff[iMbXy] + (i << 4),
+                                              pCurLayer->pLumaQp[iMbXy], pCtx)) != ERR_NONE) {
+            return iRet;//abnormal
+          }
+        }
+        ST32A4 (&pNzc[0], LD32 (&pNonZeroCount[1 + 8 * 1]));
+        ST32A4 (&pNzc[4], LD32 (&pNonZeroCount[1 + 8 * 2]));
+        ST32A4 (&pNzc[8], LD32 (&pNonZeroCount[1 + 8 * 3]));
+        ST32A4 (&pNzc[12], LD32 (&pNonZeroCount[1 + 8 * 4]));
+      }
+    } else { //non-MB_TYPE_INTRA16x16
+      if (pCurLayer->pTransformSize8x8Flag[iMbXy]) {
+        for (iId8x8 = 0; iId8x8 < 4; iId8x8++) {
+          iMbResProperty = (IS_INTRA (pCurLayer->pMbType[iMbXy])) ? LUMA_DC_AC_INTRA_8 : LUMA_DC_AC_INTER_8;
+          if (uiCbpL & (1 << iId8x8)) {
+            int32_t iIndex = (iId8x8 << 2);
+            for (iId4x4 = 0; iId4x4 < 4; iId4x4++) {
+              if ((iRet = WelsResidualBlockCavlc8x8 (pVlcTable, pNonZeroCount, pBs, iIndex, iScanIdxEnd - iScanIdxStart + 1,
+                                                     g_kuiZigzagScan8x8 + iScanIdxStart, iMbResProperty, pCurLayer->pScaledTCoeff[iMbXy] + (iId8x8 << 6), iId4x4,
+                                                     pCurLayer->pLumaQp[iMbXy], pCtx)) != ERR_NONE) {
+                return iRet;
+              }
+              iIndex++;
+            }
+          } else {
+            ST16 (&pNonZeroCount[g_kuiCache48CountScan4Idx[iId8x8 << 2]], 0);
+            ST16 (&pNonZeroCount[g_kuiCache48CountScan4Idx[ (iId8x8 << 2) + 2]], 0);
+          }
+        }
+        ST32A4 (&pNzc[0], LD32 (&pNonZeroCount[1 + 8 * 1]));
+        ST32A4 (&pNzc[4], LD32 (&pNonZeroCount[1 + 8 * 2]));
+        ST32A4 (&pNzc[8], LD32 (&pNonZeroCount[1 + 8 * 3]));
+        ST32A4 (&pNzc[12], LD32 (&pNonZeroCount[1 + 8 * 4]));
+      } else {
+        for (iId8x8 = 0; iId8x8 < 4; iId8x8++) {
+          if (uiCbpL & (1 << iId8x8)) {
+            int32_t iIndex = (iId8x8 << 2);
+            for (iId4x4 = 0; iId4x4 < 4; iId4x4++) {
+              //Luma (DC and AC decoding together)
+              if ((iRet = WelsResidualBlockCavlc (pVlcTable, pNonZeroCount, pBs, iIndex, iScanIdxEnd - iScanIdxStart + 1,
+                                                  g_kuiZigzagScan + iScanIdxStart, LUMA_DC_AC_INTRA, pCurLayer->pScaledTCoeff[iMbXy] + (iIndex << 4),
+                                                  pCurLayer->pLumaQp[iMbXy], pCtx)) != ERR_NONE) {
+                return iRet;//abnormal
+              }
+              iIndex++;
+            }
+          } else {
+            ST16 (&pNonZeroCount[g_kuiCache48CountScan4Idx[ (iId8x8 << 2)]], 0);
+            ST16 (&pNonZeroCount[g_kuiCache48CountScan4Idx[ (iId8x8 << 2) + 2]], 0);
+          }
+        }
+        ST32A4 (&pNzc[0], LD32 (&pNonZeroCount[1 + 8 * 1]));
+        ST32A4 (&pNzc[4], LD32 (&pNonZeroCount[1 + 8 * 2]));
+        ST32A4 (&pNzc[8], LD32 (&pNonZeroCount[1 + 8 * 3]));
+        ST32A4 (&pNzc[12], LD32 (&pNonZeroCount[1 + 8 * 4]));
+      }
+    }
+
+    //chroma
+    //step1: DC
+    if (1 == uiCbpC || 2 == uiCbpC) {
+      for (i = 0; i < 2; i++) { //Cb Cr
+        iMbResProperty = i ? CHROMA_DC_V : CHROMA_DC_U;
+        if ((iRet = WelsResidualBlockCavlc (pVlcTable, pNonZeroCount, pBs, 16 + (i << 2), 4, g_kuiChromaDcScan, iMbResProperty,
+                                            pCurLayer->pScaledTCoeff[iMbXy] + 256 + (i << 6), pCurLayer->pChromaQp[iMbXy][i], pCtx)) != ERR_NONE) {
+          return iRet;//abnormal
+        }
+      }
+    }
+
+    //step2: AC
+    if (2 == uiCbpC) {
+      for (i = 0; i < 2; i++) { //Cb Cr
+        iMbResProperty = i ? CHROMA_AC_V : CHROMA_AC_U;
+        int32_t iIndex = 16 + (i << 2);
+        for (iId4x4 = 0; iId4x4 < 4; iId4x4++) {
+          if ((iRet = WelsResidualBlockCavlc (pVlcTable, pNonZeroCount, pBs, iIndex, iScanIdxEnd - WELS_MAX (iScanIdxStart,
+                                              1) + 1, g_kuiZigzagScan + WELS_MAX (iScanIdxStart, 1), iMbResProperty, pCurLayer->pScaledTCoeff[iMbXy] + (iIndex << 4),
+                                              pCurLayer->pChromaQp[iMbXy][i], pCtx)) != ERR_NONE) {
+            return iRet;//abnormal
+          }
+          iIndex++;
+        }
+      }
+      ST16A2 (&pNzc[16], LD16A2 (&pNonZeroCount[6 + 8 * 1]));
+      ST16A2 (&pNzc[20], LD16A2 (&pNonZeroCount[6 + 8 * 2]));
+      ST16A2 (&pNzc[18], LD16A2 (&pNonZeroCount[6 + 8 * 4]));
+      ST16A2 (&pNzc[22], LD16A2 (&pNonZeroCount[6 + 8 * 5]));
+    }
+    BsEndCavlc (pBs);
+  }
+
+  return ERR_NONE;
+}
+
+int32_t WelsDecodeMbCavlcISlice (PWelsDecoderContext pCtx, PNalUnit pNalCur, uint32_t& uiEosFlag) {
+  PDqLayer pCurLayer = pCtx->pCurDqLayer;
+  PBitStringAux pBs = pCurLayer->pBitStringAux;
+  PSliceHeaderExt pSliceHeaderExt = &pCurLayer->sLayerInfo.sSliceInLayer.sSliceHeaderExt;
+  int32_t iBaseModeFlag;
+  int32_t iRet = 0; //should have the return value to indicate decoding error or not, It's NECESSARY--2010.4.15
+  uint32_t uiCode;
+  intX_t iUsedBits;
+  if (pSliceHeaderExt->bAdaptiveBaseModeFlag == 1) {
+    WELS_READ_VERIFY (BsGetOneBit (pBs, &uiCode)); //base_mode_flag
+    iBaseModeFlag = uiCode;
+  } else {
+    iBaseModeFlag = pSliceHeaderExt->bDefaultBaseModeFlag;
+  }
+  if (!iBaseModeFlag) {
+    iRet = WelsActualDecodeMbCavlcISlice (pCtx);
+  } else {
+    WelsLog (& (pCtx->sLogCtx), WELS_LOG_WARNING, "iBaseModeFlag (%d) != 0, inter-layer prediction not supported.",
+             iBaseModeFlag);
+    return GENERATE_ERROR_NO (ERR_LEVEL_SLICE_HEADER, ERR_INFO_UNSUPPORTED_ILP);
+  }
+  if (iRet) { //occur error when parsing, MUST STOP decoding
+    return iRet;
+  }
+
+  // check whether there is left bits to read next time in case multiple slices
+  iUsedBits = ((pBs->pCurBuf - pBs->pStartBuf) << 3) - (16 - pBs->iLeftBits);
+  // sub 1, for stop bit
+  if ((iUsedBits == (pBs->iBits - 1)) && (0 >= pCurLayer->sLayerInfo.sSliceInLayer.iMbSkipRun)) { // slice boundary
+    uiEosFlag = 1;
+  }
+  if (iUsedBits > (pBs->iBits -
+                   1)) { //When BS incomplete, as long as find it, SHOULD stop decoding to avoid mosaic or crash.
+    WelsLog (& (pCtx->sLogCtx), WELS_LOG_WARNING,
+             "WelsDecodeMbCavlcISlice()::::pBs incomplete, iUsedBits:%" PRId64 " > pBs->iBits:%d, MUST stop decoding.",
+             (int64_t) iUsedBits, pBs->iBits);
+    return GENERATE_ERROR_NO (ERR_LEVEL_MB_DATA, ERR_INFO_BS_INCOMPLETE);
+  }
+  return ERR_NONE;
+}
+
+int32_t WelsActualDecodeMbCavlcPSlice (PWelsDecoderContext pCtx) {
+  SVlcTable* pVlcTable     = &pCtx->sVlcTable;
+  PDqLayer pCurLayer             = pCtx->pCurDqLayer;
+  PBitStringAux pBs              = pCurLayer->pBitStringAux;
+  PSlice pSlice                  = &pCurLayer->sLayerInfo.sSliceInLayer;
+  PSliceHeader pSliceHeader      = &pSlice->sSliceHeaderExt.sSliceHeader;
+
+  int32_t iScanIdxStart = pSlice->sSliceHeaderExt.uiScanIdxStart;
+  int32_t iScanIdxEnd   = pSlice->sSliceHeaderExt.uiScanIdxEnd;
+
+  SWelsNeighAvail sNeighAvail;
+  int32_t iMbX = pCurLayer->iMbX;
+  int32_t iMbY = pCurLayer->iMbY;
+  const int32_t iMbXy = pCurLayer->iMbXyIndex;
+  int8_t* pNzc = pCurLayer->pNzc[iMbXy];
+  int32_t i;
+  int32_t iRet = ERR_NONE;
+  uint32_t uiMbType = 0, uiCbp = 0, uiCbpL = 0, uiCbpC = 0;
+  uint32_t uiCode;
+  int32_t iCode;
+  int32_t iMbResProperty;
+
+  GetNeighborAvailMbType (&sNeighAvail, pCurLayer);
+  ENFORCE_STACK_ALIGN_1D (uint8_t, pNonZeroCount, 48, 16);
+  pCurLayer->pInterPredictionDoneFlag[iMbXy] = 0;//2009.10.23
+  WELS_READ_VERIFY (BsGetUe (pBs, &uiCode)); //uiMbType
+  uiMbType = uiCode;
+  if (uiMbType < 5) { //inter MB type
+    int16_t iMotionVector[LIST_A][30][MV_A];
+    int8_t  iRefIndex[LIST_A][30];
+    pCurLayer->pMbType[iMbXy] = g_ksInterPMbTypeInfo[uiMbType].iType;
+    WelsFillCacheInter (&sNeighAvail, pNonZeroCount, iMotionVector, iRefIndex, pCurLayer);
+
+    if ((iRet = ParseInterInfo (pCtx, iMotionVector, iRefIndex, pBs)) != ERR_NONE) {
+      return iRet;//abnormal
+    }
+
+    if (pSlice->sSliceHeaderExt.bAdaptiveResidualPredFlag == 1) {
+      WELS_READ_VERIFY (BsGetOneBit (pBs, &uiCode)); //residual_prediction_flag
+      pCurLayer->pResidualPredFlag[iMbXy] =  uiCode;
+    } else {
+      pCurLayer->pResidualPredFlag[iMbXy] = pSlice->sSliceHeaderExt.bDefaultResidualPredFlag;
+    }
+
+    if (pCurLayer->pResidualPredFlag[iMbXy] == 0) {
+      pCurLayer->pInterPredictionDoneFlag[iMbXy] = 0;
+    } else {
+      WelsLog (& (pCtx->sLogCtx), WELS_LOG_WARNING, "residual_pred_flag = 1 not supported.");
+      return GENERATE_ERROR_NO (ERR_LEVEL_MB_DATA, ERR_INFO_UNSUPPORTED_ILP);
+    }
+  } else { //intra MB type
+    uiMbType -= 5;
+    if (uiMbType > 25)
+      return GENERATE_ERROR_NO (ERR_LEVEL_MB_DATA, ERR_INFO_INVALID_MB_TYPE);
+    if (!pCtx->pSps->uiChromaFormatIdc && ((uiMbType >= 5 && uiMbType <= 12) || (uiMbType >= 17 && uiMbType <= 24)))
+      return GENERATE_ERROR_NO (ERR_LEVEL_MB_DATA, ERR_INFO_INVALID_MB_TYPE);
+
+    if (25 == uiMbType) {
+      WelsLog (& (pCtx->sLogCtx), WELS_LOG_DEBUG, "I_PCM mode exists in P slice!");
+      int32_t iDecStrideL = pCurLayer->pDec->iLinesize[0];
+      int32_t iDecStrideC = pCurLayer->pDec->iLinesize[1];
+
+      int32_t iOffsetL = (iMbX + iMbY * iDecStrideL) << 4;
+      int32_t iOffsetC = (iMbX + iMbY * iDecStrideC) << 3;
+
+      uint8_t* pDecY = pCurLayer->pDec->pData[0] + iOffsetL;
+      uint8_t* pDecU = pCurLayer->pDec->pData[1] + iOffsetC;
+      uint8_t* pDecV = pCurLayer->pDec->pData[2] + iOffsetC;
+
+      uint8_t* pTmpBsBuf;
+
+      int32_t i;
+      int32_t iCopySizeY  = (sizeof (uint8_t) << 4);
+      int32_t iCopySizeUV = (sizeof (uint8_t) << 3);
+
+      int32_t iIndex = ((-pBs->iLeftBits) >> 3) + 2;
+
+      pCurLayer->pMbType[iMbXy] = MB_TYPE_INTRA_PCM;
+
+      //step 1: locating bit-stream pointer [must align into integer byte]
+      pBs->pCurBuf -= iIndex;
+
+      //step 2: copy pixel from bit-stream into fdec [reconstruction]
+      pTmpBsBuf = pBs->pCurBuf;
+      if (!pCtx->pParam->bParseOnly) {
+        for (i = 0; i < 16; i++) { //luma
+          memcpy (pDecY, pTmpBsBuf, iCopySizeY);
+          pDecY += iDecStrideL;
+          pTmpBsBuf += 16;
+        }
+
+        for (i = 0; i < 8; i++) { //cb
+          memcpy (pDecU, pTmpBsBuf, iCopySizeUV);
+          pDecU += iDecStrideC;
+          pTmpBsBuf += 8;
+        }
+        for (i = 0; i < 8; i++) { //cr
+          memcpy (pDecV, pTmpBsBuf, iCopySizeUV);
+          pDecV += iDecStrideC;
+          pTmpBsBuf += 8;
+        }
+      }
+
+      pBs->pCurBuf += 384;
+
+      //step 3: update QP and pNonZeroCount
+      pCurLayer->pLumaQp[iMbXy] = 0;
+      pCurLayer->pChromaQp[iMbXy][0] = pCurLayer->pChromaQp[iMbXy][1] = 0;
+      //Rec. 9.2.1 for PCM, nzc=16
+      ST32A4 (&pNzc[0], 0x10101010);
+      ST32A4 (&pNzc[4], 0x10101010);
+      ST32A4 (&pNzc[8], 0x10101010);
+      ST32A4 (&pNzc[12], 0x10101010);
+      ST32A4 (&pNzc[16], 0x10101010);
+      ST32A4 (&pNzc[20], 0x10101010);
+      WELS_READ_VERIFY (InitReadBits (pBs, 0));
+      return ERR_NONE;
+    } else {
+      if (0 == uiMbType) {
+        ENFORCE_STACK_ALIGN_1D (int8_t, pIntraPredMode, 48, 16);
+        pCurLayer->pMbType[iMbXy] = MB_TYPE_INTRA4x4;
+        if (pCtx->pPps->bTransform8x8ModeFlag) {
+          WELS_READ_VERIFY (BsGetOneBit (pBs, &uiCode)); //transform_size_8x8_flag
+          pCurLayer->pTransformSize8x8Flag[iMbXy] = !!uiCode;
+          if (pCurLayer->pTransformSize8x8Flag[iMbXy]) {
+            uiMbType = pCurLayer->pMbType[iMbXy] = MB_TYPE_INTRA8x8;
+          }
+        }
+        if (!pCurLayer->pTransformSize8x8Flag[iMbXy]) {
+          pCtx->pFillInfoCacheIntraNxNFunc (&sNeighAvail, pNonZeroCount, pIntraPredMode, pCurLayer);
+          WELS_READ_VERIFY (ParseIntra4x4Mode (pCtx, &sNeighAvail, pIntraPredMode, pBs, pCurLayer));
+        } else {
+          pCtx->pFillInfoCacheIntraNxNFunc (&sNeighAvail, pNonZeroCount, pIntraPredMode, pCurLayer);
+          WELS_READ_VERIFY (ParseIntra8x8Mode (pCtx, &sNeighAvail, pIntraPredMode, pBs, pCurLayer));
+        }
+      } else { //I_PCM exclude, we can ignore it
+        pCurLayer->pMbType[iMbXy] = MB_TYPE_INTRA16x16;
+        pCurLayer->pTransformSize8x8Flag[iMbXy] = false;
+        pCurLayer->pNoSubMbPartSizeLessThan8x8Flag[iMbXy] = true;
+        pCurLayer->pIntraPredMode[iMbXy][7] = (uiMbType - 1) & 3;
+        pCurLayer->pCbp[iMbXy] = g_kuiI16CbpTable[ (uiMbType - 1) >> 2];
+        uiCbpC = pCtx->pSps->uiChromaFormatIdc ? pCurLayer->pCbp[iMbXy] >> 4 : 0;
+        uiCbpL = pCurLayer->pCbp[iMbXy] & 15;
+        WelsFillCacheNonZeroCount (&sNeighAvail, pNonZeroCount, pCurLayer);
+        if ((iRet = ParseIntra16x16Mode (pCtx, &sNeighAvail, pBs, pCurLayer)) != ERR_NONE) {
+          return iRet;
+        }
+      }
+    }
+  }
+
+  if (MB_TYPE_INTRA16x16 != pCurLayer->pMbType[iMbXy]) {
+    WELS_READ_VERIFY (BsGetUe (pBs, &uiCode)); //coded_block_pattern
+    uiCbp = uiCode;
+    {
+      if (pCtx->pSps->uiChromaFormatIdc && (uiCbp > 47))
+        return GENERATE_ERROR_NO (ERR_LEVEL_MB_DATA, ERR_INFO_INVALID_CBP);
+      if (!pCtx->pSps->uiChromaFormatIdc && (uiCbp > 15))
+        return GENERATE_ERROR_NO (ERR_LEVEL_MB_DATA, ERR_INFO_INVALID_CBP);
+      if (MB_TYPE_INTRA4x4 == pCurLayer->pMbType[iMbXy] || MB_TYPE_INTRA8x8 == pCurLayer->pMbType[iMbXy]) {
+
+        uiCbp = pCtx->pSps->uiChromaFormatIdc ? g_kuiIntra4x4CbpTable[uiCbp] : g_kuiIntra4x4CbpTable400[uiCbp];
+      } else //inter
+        uiCbp = pCtx->pSps->uiChromaFormatIdc ?  g_kuiInterCbpTable[uiCbp] : g_kuiInterCbpTable400[uiCbp];
+    }
+
+    pCurLayer->pCbp[iMbXy] = uiCbp;
+    uiCbpC = pCurLayer->pCbp[iMbXy] >> 4;
+    uiCbpL = pCurLayer->pCbp[iMbXy] & 15;
+
+    // Need modification when B picutre add in
+    bool bNeedParseTransformSize8x8Flag =
+      (((pCurLayer->pMbType[iMbXy] >= MB_TYPE_16x16 && pCurLayer->pMbType[iMbXy] <= MB_TYPE_8x16)
+        || pCurLayer->pNoSubMbPartSizeLessThan8x8Flag[iMbXy])
+       && (pCurLayer->pMbType[iMbXy] != MB_TYPE_INTRA8x8)
+       && (pCurLayer->pMbType[iMbXy] != MB_TYPE_INTRA4x4)
+       && (uiCbpL > 0)
+       && (pCtx->pPps->bTransform8x8ModeFlag));
+
+    if (bNeedParseTransformSize8x8Flag) {
+      WELS_READ_VERIFY (BsGetOneBit (pBs, &uiCode)); //transform_size_8x8_flag
+      pCurLayer->pTransformSize8x8Flag[iMbXy] = !!uiCode;
+    }
+  }
+
+  ST32A4 (&pNzc[0], 0);
+  ST32A4 (&pNzc[4], 0);
+  ST32A4 (&pNzc[8], 0);
+  ST32A4 (&pNzc[12], 0);
+  ST32A4 (&pNzc[16], 0);
+  ST32A4 (&pNzc[20], 0);
+  if (pCurLayer->pCbp[iMbXy] == 0 && !IS_INTRA16x16 (pCurLayer->pMbType[iMbXy]) && !IS_I_BL (pCurLayer->pMbType[iMbXy])) {
+    pCurLayer->pLumaQp[iMbXy] = pSlice->iLastMbQp;
+    for (i = 0; i < 2; i++) {
+      pCurLayer->pChromaQp[iMbXy][i] = g_kuiChromaQpTable[WELS_CLIP3 (pCurLayer->pLumaQp[iMbXy] +
+                                       pSliceHeader->pPps->iChromaQpIndexOffset[i], 0, 51)];
+    }
+  }
+
+  if (pCurLayer->pCbp[iMbXy] || MB_TYPE_INTRA16x16 == pCurLayer->pMbType[iMbXy]) {
+    int32_t iQpDelta, iId8x8, iId4x4;
+    memset (pCurLayer->pScaledTCoeff[iMbXy], 0, MB_COEFF_LIST_SIZE * sizeof (int16_t));
+    WELS_READ_VERIFY (BsGetSe (pBs, &iCode)); //mb_qp_delta
+    iQpDelta = iCode;
+
+    if (iQpDelta > 25 || iQpDelta < -26) { //out of iQpDelta range
+      return GENERATE_ERROR_NO (ERR_LEVEL_MB_DATA, ERR_INFO_INVALID_QP);
+    }
+
+    pCurLayer->pLumaQp[iMbXy] = (pSlice->iLastMbQp + iQpDelta + 52) % 52; //update last_mb_qp
+    pSlice->iLastMbQp = pCurLayer->pLumaQp[iMbXy];
+    for (i = 0; i < 2; i++) {
+      pCurLayer->pChromaQp[iMbXy][i] = g_kuiChromaQpTable[WELS_CLIP3 (pSlice->iLastMbQp +
+                                       pSliceHeader->pPps->iChromaQpIndexOffset[i], 0,
+                                       51)];
+    }
+
+    BsStartCavlc (pBs);
+
+    if (MB_TYPE_INTRA16x16 == pCurLayer->pMbType[iMbXy]) {
+      //step1: Luma DC
+      if ((iRet = WelsResidualBlockCavlc (pVlcTable, pNonZeroCount, pBs, 0, 16, g_kuiLumaDcZigzagScan, I16_LUMA_DC,
+                                          pCurLayer->pScaledTCoeff[iMbXy], pCurLayer->pLumaQp[iMbXy], pCtx)) != ERR_NONE) {
+        return iRet;//abnormal
+      }
+      //step2: Luma AC
+      if (uiCbpL) {
+        for (i = 0; i < 16; i++) {
+          if ((iRet = WelsResidualBlockCavlc (pVlcTable, pNonZeroCount, pBs, i, iScanIdxEnd - WELS_MAX (iScanIdxStart, 1) + 1,
+                                              g_kuiZigzagScan + WELS_MAX (iScanIdxStart, 1), I16_LUMA_AC, pCurLayer->pScaledTCoeff[iMbXy] + (i << 4),
+                                              pCurLayer->pLumaQp[iMbXy], pCtx)) != ERR_NONE) {
+            return iRet;//abnormal
+          }
+        }
+        ST32A4 (&pNzc[0], LD32 (&pNonZeroCount[1 + 8 * 1]));
+        ST32A4 (&pNzc[4], LD32 (&pNonZeroCount[1 + 8 * 2]));
+        ST32A4 (&pNzc[8], LD32 (&pNonZeroCount[1 + 8 * 3]));
+        ST32A4 (&pNzc[12], LD32 (&pNonZeroCount[1 + 8 * 4]));
+      }
+    } else { //non-MB_TYPE_INTRA16x16
+      if (pCurLayer->pTransformSize8x8Flag[iMbXy]) {
+        for (iId8x8 = 0; iId8x8 < 4; iId8x8++) {
+          iMbResProperty = (IS_INTRA (pCurLayer->pMbType[iMbXy])) ? LUMA_DC_AC_INTRA_8 : LUMA_DC_AC_INTER_8;
+          if (uiCbpL & (1 << iId8x8)) {
+            int32_t iIndex = (iId8x8 << 2);
+            for (iId4x4 = 0; iId4x4 < 4; iId4x4++) {
+              if ((iRet = WelsResidualBlockCavlc8x8 (pVlcTable, pNonZeroCount, pBs, iIndex, iScanIdxEnd - iScanIdxStart + 1,
+                                                     g_kuiZigzagScan8x8 + iScanIdxStart, iMbResProperty, pCurLayer->pScaledTCoeff[iMbXy] + (iId8x8 << 6), iId4x4,
+                                                     pCurLayer->pLumaQp[iMbXy], pCtx)) != ERR_NONE) {
+                return iRet;
+              }
+              iIndex++;
+            }
+          } else {
+            ST16 (&pNonZeroCount[g_kuiCache48CountScan4Idx[iId8x8 << 2]], 0);
+            ST16 (&pNonZeroCount[g_kuiCache48CountScan4Idx[ (iId8x8 << 2) + 2]], 0);
+          }
+        }
+        ST32A4 (&pNzc[0], LD32 (&pNonZeroCount[1 + 8 * 1]));
+        ST32A4 (&pNzc[4], LD32 (&pNonZeroCount[1 + 8 * 2]));
+        ST32A4 (&pNzc[8], LD32 (&pNonZeroCount[1 + 8 * 3]));
+        ST32A4 (&pNzc[12], LD32 (&pNonZeroCount[1 + 8 * 4]));
+      } else { // Normal T4x4
+        for (iId8x8 = 0; iId8x8 < 4; iId8x8++) {
+          iMbResProperty = (IS_INTRA (pCurLayer->pMbType[iMbXy])) ? LUMA_DC_AC_INTRA : LUMA_DC_AC_INTER;
+          if (uiCbpL & (1 << iId8x8)) {
+            int32_t iIndex = (iId8x8 << 2);
+            for (iId4x4 = 0; iId4x4 < 4; iId4x4++) {
+              //Luma (DC and AC decoding together)
+              if ((iRet = WelsResidualBlockCavlc (pVlcTable, pNonZeroCount, pBs, iIndex, iScanIdxEnd - iScanIdxStart + 1,
+                                                  g_kuiZigzagScan + iScanIdxStart, iMbResProperty, pCurLayer->pScaledTCoeff[iMbXy] + (iIndex << 4),
+                                                  pCurLayer->pLumaQp[iMbXy], pCtx)) != ERR_NONE) {
+                return iRet;//abnormal
+              }
+              iIndex++;
+            }
+          } else {
+            ST16 (&pNonZeroCount[g_kuiCache48CountScan4Idx[iId8x8 << 2]], 0);
+            ST16 (&pNonZeroCount[g_kuiCache48CountScan4Idx[ (iId8x8 << 2) + 2]], 0);
+          }
+        }
+        ST32A4 (&pNzc[0], LD32 (&pNonZeroCount[1 + 8 * 1]));
+        ST32A4 (&pNzc[4], LD32 (&pNonZeroCount[1 + 8 * 2]));
+        ST32A4 (&pNzc[8], LD32 (&pNonZeroCount[1 + 8 * 3]));
+        ST32A4 (&pNzc[12], LD32 (&pNonZeroCount[1 + 8 * 4]));
+      }
+    }
+
+
+    //chroma
+    //step1: DC
+    if (1 == uiCbpC || 2 == uiCbpC) {
+      for (i = 0; i < 2; i++) { //Cb Cr
+        if (IS_INTRA (pCurLayer->pMbType[iMbXy]))
+          iMbResProperty = i ? CHROMA_DC_V : CHROMA_DC_U;
+        else
+          iMbResProperty = i ? CHROMA_DC_V_INTER : CHROMA_DC_U_INTER;
+
+        if ((iRet = WelsResidualBlockCavlc (pVlcTable, pNonZeroCount, pBs, 16 + (i << 2), 4, g_kuiChromaDcScan, iMbResProperty,
+                                            pCurLayer->pScaledTCoeff[iMbXy] + 256 + (i << 6), pCurLayer->pChromaQp[iMbXy][i], pCtx)) != ERR_NONE) {
+          return iRet;//abnormal
+        }
+      }
+    } else {
+    }
+    //step2: AC
+    if (2 == uiCbpC) {
+      for (i = 0; i < 2; i++) { //Cb Cr
+        if (IS_INTRA (pCurLayer->pMbType[iMbXy]))
+          iMbResProperty = i ? CHROMA_AC_V : CHROMA_AC_U;
+        else
+          iMbResProperty = i ? CHROMA_AC_V_INTER : CHROMA_AC_U_INTER;
+
+        int32_t iIndex = 16 + (i << 2);
+        for (iId4x4 = 0; iId4x4 < 4; iId4x4++) {
+          if ((iRet = WelsResidualBlockCavlc (pVlcTable, pNonZeroCount, pBs, iIndex, iScanIdxEnd - WELS_MAX (iScanIdxStart,
+                                              1) + 1, g_kuiZigzagScan + WELS_MAX (iScanIdxStart, 1), iMbResProperty, pCurLayer->pScaledTCoeff[iMbXy] + (iIndex << 4),
+                                              pCurLayer->pChromaQp[iMbXy][i], pCtx)) != ERR_NONE) {
+            return iRet;//abnormal
+          }
+          iIndex++;
+        }
+      }
+      ST16A2 (&pNzc[16], LD16A2 (&pNonZeroCount[6 + 8 * 1]));
+      ST16A2 (&pNzc[20], LD16A2 (&pNonZeroCount[6 + 8 * 2]));
+      ST16A2 (&pNzc[18], LD16A2 (&pNonZeroCount[6 + 8 * 4]));
+      ST16A2 (&pNzc[22], LD16A2 (&pNonZeroCount[6 + 8 * 5]));
+    }
+    BsEndCavlc (pBs);
+  }
+
+  return ERR_NONE;
+}
+
+int32_t WelsDecodeMbCavlcPSlice (PWelsDecoderContext pCtx, PNalUnit pNalCur, uint32_t& uiEosFlag) {
+  PDqLayer pCurLayer             = pCtx->pCurDqLayer;
+  PBitStringAux pBs              = pCurLayer->pBitStringAux;
+  PSlice pSlice                  = &pCurLayer->sLayerInfo.sSliceInLayer;
+  PSliceHeader pSliceHeader      = &pSlice->sSliceHeaderExt.sSliceHeader;
+  PPicture* ppRefPic = pCtx->sRefPic.pRefList[LIST_0];
+  intX_t iUsedBits;
+  const int32_t iMbXy = pCurLayer->iMbXyIndex;
+  int8_t* pNzc = pCurLayer->pNzc[iMbXy];
+  int32_t iBaseModeFlag, i;
+  int32_t iRet = 0; //should have the return value to indicate decoding error or not, It's NECESSARY--2010.4.15
+  uint32_t uiCode;
+
+  pCurLayer->pNoSubMbPartSizeLessThan8x8Flag[iMbXy] = true;
+  pCurLayer->pTransformSize8x8Flag[iMbXy] = false;
+
+  if (-1 == pSlice->iMbSkipRun) {
+    WELS_READ_VERIFY (BsGetUe (pBs, &uiCode)); //mb_skip_run
+    pSlice->iMbSkipRun = uiCode;
+    if (-1 == pSlice->iMbSkipRun) {
+      return GENERATE_ERROR_NO (ERR_LEVEL_MB_DATA, ERR_INFO_INVALID_MB_SKIP_RUN);
+    }
+  }
+  if (pSlice->iMbSkipRun--) {
+    int16_t iMv[2];
+
+    pCurLayer->pMbType[iMbXy] = MB_TYPE_SKIP;
+    ST32A4 (&pNzc[0], 0);
+    ST32A4 (&pNzc[4], 0);
+    ST32A4 (&pNzc[8], 0);
+    ST32A4 (&pNzc[12], 0);
+    ST32A4 (&pNzc[16], 0);
+    ST32A4 (&pNzc[20], 0);
+
+    pCurLayer->pInterPredictionDoneFlag[iMbXy] = 0;
+    memset (pCurLayer->pRefIndex[0][iMbXy], 0, sizeof (int8_t) * 16);
+    pCtx->bMbRefConcealed = pCtx->bRPLRError || pCtx->bMbRefConcealed || ! (ppRefPic[0] && ppRefPic[0]->bIsComplete);
+    //predict iMv
+    PredPSkipMvFromNeighbor (pCurLayer, iMv);
+    for (i = 0; i < 16; i++) {
+      ST32A2 (pCurLayer->pMv[0][iMbXy][i], * (uint32_t*)iMv);
+    }
+
+    //if (!pSlice->sSliceHeaderExt.bDefaultResidualPredFlag) {
+    //  memset (pCurLayer->pScaledTCoeff[iMbXy], 0, 384 * sizeof (int16_t));
+    //}
+
+    //reset rS
+    if (!pSlice->sSliceHeaderExt.bDefaultResidualPredFlag ||
+        (pNalCur->sNalHeaderExt.uiQualityId == 0 && pNalCur->sNalHeaderExt.uiDependencyId == 0)) {
+      pCurLayer->pLumaQp[iMbXy] = pSlice->iLastMbQp;
+      for (i = 0; i < 2; i++) {
+        pCurLayer->pChromaQp[iMbXy][i] = g_kuiChromaQpTable[WELS_CLIP3 (pCurLayer->pLumaQp[iMbXy] +
+                                         pSliceHeader->pPps->iChromaQpIndexOffset[i], 0, 51)];
+      }
+    }
+
+    pCurLayer->pCbp[iMbXy] = 0;
+  } else {
+    if (pSlice->sSliceHeaderExt.bAdaptiveBaseModeFlag == 1) {
+      WELS_READ_VERIFY (BsGetOneBit (pBs, &uiCode)); //base_mode_flag
+      iBaseModeFlag = uiCode;
+    } else {
+      iBaseModeFlag = pSlice->sSliceHeaderExt.bDefaultBaseModeFlag;
+    }
+    if (!iBaseModeFlag) {
+      iRet = WelsActualDecodeMbCavlcPSlice (pCtx);
+    } else {
+      WelsLog (& (pCtx->sLogCtx), WELS_LOG_WARNING, "iBaseModeFlag (%d) != 0, inter-layer prediction not supported.",
+               iBaseModeFlag);
+      return GENERATE_ERROR_NO (ERR_LEVEL_SLICE_HEADER, ERR_INFO_UNSUPPORTED_ILP);
+    }
+    if (iRet) { //occur error when parsing, MUST STOP decoding
+      return iRet;
+    }
+  }
+  // check whether there is left bits to read next time in case multiple slices
+  iUsedBits = ((pBs->pCurBuf - pBs->pStartBuf) << 3) - (16 - pBs->iLeftBits);
+  // sub 1, for stop bit
+  if ((iUsedBits == (pBs->iBits - 1)) && (0 >= pCurLayer->sLayerInfo.sSliceInLayer.iMbSkipRun)) { // slice boundary
+    uiEosFlag = 1;
+  }
+  if (iUsedBits > (pBs->iBits -
+                   1)) { //When BS incomplete, as long as find it, SHOULD stop decoding to avoid mosaic or crash.
+    WelsLog (& (pCtx->sLogCtx), WELS_LOG_WARNING,
+             "WelsDecodeMbCavlcISlice()::::pBs incomplete, iUsedBits:%" PRId64 " > pBs->iBits:%d, MUST stop decoding.",
+             (int64_t) iUsedBits, pBs->iBits);
+    return GENERATE_ERROR_NO (ERR_LEVEL_MB_DATA, ERR_INFO_BS_INCOMPLETE);
+  }
+  return ERR_NONE;
+}
+
+void WelsBlockFuncInit (SBlockFunc*   pFunc,  int32_t iCpu) {
+  pFunc->pWelsSetNonZeroCountFunc   = WelsNonZeroCount_c;
+  pFunc->pWelsBlockZero16x16Func    = WelsBlockZero16x16_c;
+  pFunc->pWelsBlockZero8x8Func      = WelsBlockZero8x8_c;
+
+#ifdef HAVE_NEON
+  if (iCpu & WELS_CPU_NEON) {
+    pFunc->pWelsSetNonZeroCountFunc = WelsNonZeroCount_neon;
+    pFunc->pWelsBlockZero16x16Func  = WelsBlockZero16x16_neon;
+    pFunc->pWelsBlockZero8x8Func    = WelsBlockZero8x8_neon;
+  }
+#endif
+
+#ifdef HAVE_NEON_AARCH64
+  if (iCpu & WELS_CPU_NEON) {
+    pFunc->pWelsSetNonZeroCountFunc = WelsNonZeroCount_AArch64_neon;
+    pFunc->pWelsBlockZero16x16Func  = WelsBlockZero16x16_AArch64_neon;
+    pFunc->pWelsBlockZero8x8Func    = WelsBlockZero8x8_AArch64_neon;
+  }
+#endif
+
+#if defined(X86_ASM)
+  if (iCpu & WELS_CPU_SSE2) {
+    pFunc->pWelsSetNonZeroCountFunc = WelsNonZeroCount_sse2;
+    pFunc->pWelsBlockZero16x16Func  = WelsBlockZero16x16_sse2;
+    pFunc->pWelsBlockZero8x8Func    = WelsBlockZero8x8_sse2;
+  }
+#endif
+
+}
+
+void WelsBlockInit (int16_t* pBlock, int iW, int iH, int iStride, uint8_t uiVal) {
+  int32_t i;
+  int16_t* pDst = pBlock;
+
+  for (i = 0; i < iH; i++) {
+    memset (pDst, uiVal, iW * sizeof (int16_t));
+    pDst += iStride;
+  }
+}
+void WelsBlockZero16x16_c (int16_t* pBlock, int32_t iStride) {
+  WelsBlockInit (pBlock, 16, 16, iStride, 0);
+}
+
+void WelsBlockZero8x8_c (int16_t* pBlock, int32_t iStride) {
+  WelsBlockInit (pBlock, 8, 8, iStride, 0);
+}
+bool ComputeColocated (PWelsDecoderContext pCtx) {
+  PDqLayer pCurLayer = pCtx->pCurDqLayer;
+  PSlice pCurSlice = &pCurLayer->sLayerInfo.sSliceInLayer;
+  PSliceHeader pSliceHeader = &pCurSlice->sSliceHeaderExt.sSliceHeader;
+  if (!pSliceHeader->iDirectSpatialMvPredFlag) {
+    uint32_t uiShortRefCount = pCtx->sRefPic.uiShortRefCount[LIST_0];
+    for (int32_t listIdx = LIST_0; listIdx < LIST_A; ++listIdx) {
+      for (uint32_t i = 0; i < uiShortRefCount; ++i) {
+        int32_t iTRb = WELS_CLIP3 (-128, 127, pSliceHeader->iPicOrderCntLsb - pCtx->sRefPic.pRefList[listIdx][i]->iFramePoc);
+        int32_t iTRp = WELS_CLIP3 (-128, 127,
+                                   pCtx->sRefPic.pRefList[LIST_1][i]->iFramePoc - pCtx->sRefPic.pRefList[LIST_0][i]->iFramePoc);
+        if (iTRp != 0) {
+          int32_t prescale = (16384 + iAbs (iTRp / 2)) / iTRp;
+          pCurSlice->iMvScale[listIdx][i] = WELS_CLIP3 (-1024, 1023, (iTRb * prescale + 32) >> 6);
+        } else {
+          pCurSlice->iMvScale[listIdx][i] = 0x03FFF;
+        }
+      }
+    }
+  }
+  //Implement the following
+  //get Mv_colocated_L1
+  //and do calculation
+  //iMvp[LIST_0] = Mv_colocated_L1 * (POC(cur) - POC(L0))/POC(L1) - POC(L0))
+  //iMvp[LIST_1] = Mv_colocated_L1 * (POC(cur) - POC(L1))/POC(L1) - POC(L0))
+  return true;
+}
+} // namespace WelsDec
--- a/codec/decoder/core/src/decoder.cpp
+++ b/codec/decoder/core/src/decoder.cpp
@@ -302,12 +302,13 @@
 
   pCtx->pDec                      = NULL;
 
+  pCtx->pTempDec                  = NULL;
+
   WelsResetRefPic (pCtx);
 
   pCtx->iActiveFmoNum             = 0;
 
-  pCtx->pPicBuff[LIST_0]          = NULL;
-  pCtx->pPicBuff[LIST_1]          = NULL;
+  pCtx->pPicBuff          = NULL;
 
   pCtx->bAvcBasedFlag             = true;
   pCtx->pPreviousDecodedPictureInDpb = NULL;
@@ -324,6 +325,8 @@
   pCtx->iSubSPSInvalidNum = 0;
   pCtx->iSubSPSLastInvalidId = -1;
   pCtx->iFeedbackNalRefIdc = -1; //initialize
+  pCtx->iPrevPicOrderCntMsb = 0;
+  pCtx->iPrevPicOrderCntLsb = 0;
 
 }
 
@@ -363,7 +366,6 @@
   const int32_t kiPicHeight     = kiMbHeight << 4;
   int32_t iErr = ERR_NONE;
 
-  int32_t iListIdx              = 0;    //, mb_blocks   = 0;
   int32_t iPicQueueSize         = 0;    // adaptive size of picture queue, = (pSps->iNumRefFrames x 2)
   bReallocFlag                  = false;
   bool  bNeedChangePicQueue     = true;
@@ -375,8 +377,8 @@
   // get picture queue size currently
   iPicQueueSize = GetTargetRefListSize (pCtx);  // adaptive size of picture queue, = (pSps->iNumRefFrames x 2)
   pCtx->iPicQueueNumber = iPicQueueSize;
-  if (pCtx->pPicBuff[LIST_0] != NULL
-      && pCtx->pPicBuff[LIST_0]->iCapacity ==
+  if (pCtx->pPicBuff != NULL
+      && pCtx->pPicBuff->iCapacity ==
       iPicQueueSize) // comparing current picture queue size requested and previous allocation picture queue
     bNeedChangePicQueue = false;
   // HD based pic buffer need consider memory size consumed when switch from 720p to other lower size
@@ -387,16 +389,17 @@
   WelsResetRefPic (pCtx); // added to sync update ref list due to pictures are free
 
   if (pCtx->bHaveGotMemory && (kiPicWidth == pCtx->iImgWidthInPixel && kiPicHeight == pCtx->iImgHeightInPixel)
-      && pCtx->pPicBuff[LIST_0] != NULL && pCtx->pPicBuff[LIST_0]->iCapacity != iPicQueueSize) {
+      && pCtx->pPicBuff != NULL && pCtx->pPicBuff->iCapacity != iPicQueueSize) {
     // currently only active for LIST_0 due to have no B frames
+    // Actually just need one memory allocation for the PicBuff. While it needs two pointer list (LIST_0 and LIST_1).
     WelsLog (& (pCtx->sLogCtx), WELS_LOG_INFO,
              "WelsRequestMem(): memory re-alloc for no resolution change (size = %d * %d), ref list size change from %d to %d",
-             kiPicWidth, kiPicHeight, pCtx->pPicBuff[LIST_0]->iCapacity, iPicQueueSize);
-    if (pCtx->pPicBuff[LIST_0]->iCapacity < iPicQueueSize) {
-      iErr = IncreasePicBuff (pCtx, &pCtx->pPicBuff[LIST_0], pCtx->pPicBuff[LIST_0]->iCapacity, kiPicWidth, kiPicHeight,
+             kiPicWidth, kiPicHeight, pCtx->pPicBuff->iCapacity, iPicQueueSize);
+    if (pCtx->pPicBuff->iCapacity < iPicQueueSize) {
+      iErr = IncreasePicBuff (pCtx, &pCtx->pPicBuff, pCtx->pPicBuff->iCapacity, kiPicWidth, kiPicHeight,
                               iPicQueueSize);
     } else {
-      iErr = DecreasePicBuff (pCtx, &pCtx->pPicBuff[LIST_0], pCtx->pPicBuff[LIST_0]->iCapacity, kiPicWidth, kiPicHeight,
+      iErr = DecreasePicBuff (pCtx, &pCtx->pPicBuff, pCtx->pPicBuff->iCapacity, kiPicWidth, kiPicHeight,
                               iPicQueueSize);
     }
   } else {
@@ -403,23 +406,22 @@
     if (pCtx->bHaveGotMemory)
       WelsLog (& (pCtx->sLogCtx), WELS_LOG_INFO,
                "WelsRequestMem(): memory re-alloc for resolution change, size change from %d * %d to %d * %d, ref list size change from %d to %d",
-               pCtx->iImgWidthInPixel, pCtx->iImgHeightInPixel, kiPicWidth, kiPicHeight, pCtx->pPicBuff[LIST_0]->iCapacity,
+               pCtx->iImgWidthInPixel, pCtx->iImgHeightInPixel, kiPicWidth, kiPicHeight, pCtx->pPicBuff->iCapacity,
                iPicQueueSize);
     else
       WelsLog (& (pCtx->sLogCtx), WELS_LOG_INFO, "WelsRequestMem(): memory alloc size = %d * %d, ref list size = %d",
                kiPicWidth, kiPicHeight, iPicQueueSize);
     // for Recycled_Pic_Queue
-    for (iListIdx = LIST_0; iListIdx < LIST_A; ++ iListIdx) {
-      PPicBuff* ppPic = &pCtx->pPicBuff[iListIdx];
-      if (NULL != ppPic && NULL != *ppPic) {
-        DestroyPicBuff (ppPic, pMa);
-      }
+    PPicBuff* ppPic = &pCtx->pPicBuff;
+    if (NULL != ppPic && NULL != *ppPic) {
+      DestroyPicBuff (ppPic, pMa);
     }
 
+
     pCtx->pPreviousDecodedPictureInDpb = NULL;
 
     // currently only active for LIST_0 due to have no B frames
-    iErr = CreatePicBuff (pCtx, &pCtx->pPicBuff[LIST_0], iPicQueueSize, kiPicWidth, kiPicHeight);
+    iErr = CreatePicBuff (pCtx, &pCtx->pPicBuff, iPicQueueSize, kiPicWidth, kiPicHeight);
   }
 
   if (iErr != ERR_NONE)
@@ -444,7 +446,7 @@
  *  free memory dynamically allocated during decoder
  */
 void WelsFreeDynamicMemory (PWelsDecoderContext pCtx) {
-  int32_t iListIdx = 0;
+
   CMemoryAlign* pMa = pCtx->pMemAlign;
 
   //free dq layer memory
@@ -455,11 +457,15 @@
 
   //free ref-pic list & picture memory
   WelsResetRefPic (pCtx);
-  for (iListIdx = LIST_0; iListIdx < LIST_A; ++ iListIdx) {
-    PPicBuff* pPicBuff = &pCtx->pPicBuff[iListIdx];
-    if (NULL != pPicBuff && NULL != *pPicBuff) {
-      DestroyPicBuff (pPicBuff, pMa);
-    }
+
+  PPicBuff* pPicBuff = &pCtx->pPicBuff;
+  if (NULL != pPicBuff && NULL != *pPicBuff) {
+    DestroyPicBuff (pPicBuff, pMa);
+  }
+
+  if (pCtx->pTempDec) {
+    FreePicture (pCtx->pTempDec, pCtx->pMemAlign);
+    pCtx->pTempDec = NULL;
   }
 
   // added for safe memory
--- a/codec/decoder/core/src/decoder_core.cpp
+++ b/codec/decoder/core/src/decoder_core.cpp
@@ -1,2639 +1,2777 @@
-/*!
- * \copy
- *     Copyright (c)  2013, Cisco Systems
- *     All rights reserved.
- *
- *     Redistribution and use in source and binary forms, with or without
- *     modification, are permitted provided that the following conditions
- *     are met:
- *
- *        * Redistributions of source code must retain the above copyright
- *          notice, this list of conditions and the following disclaimer.
- *
- *        * Redistributions in binary form must reproduce the above copyright
- *          notice, this list of conditions and the following disclaimer in
- *          the documentation and/or other materials provided with the
- *          distribution.
- *
- *     THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- *     "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- *     LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
- *     FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
- *     COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
- *     INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
- *     BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
- *     LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
- *     CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- *     LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
- *     ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- *     POSSIBILITY OF SUCH DAMAGE.
- *
- *      decoder_core.c: Wels decoder framework core implementation
- */
-
-#include "decoder_core.h"
-#include "error_code.h"
-#include "memmgr_nal_unit.h"
-#include "au_parser.h"
-#include "decode_slice.h"
-#include "manage_dec_ref.h"
-#include "expand_pic.h"
-#include "decoder.h"
-#include "decode_mb_aux.h"
-#include "memory_align.h"
-#include "error_concealment.h"
-
-namespace WelsDec {
-static inline int32_t DecodeFrameConstruction (PWelsDecoderContext pCtx, uint8_t** ppDst, SBufferInfo* pDstInfo) {
-  PDqLayer pCurDq = pCtx->pCurDqLayer;
-  PPicture pPic = pCtx->pDec;
-
-  const int32_t kiWidth = pCurDq->iMbWidth << 4;
-  const int32_t kiHeight = pCurDq->iMbHeight << 4;
-
-  const int32_t kiTotalNumMbInCurLayer = pCurDq->iMbWidth * pCurDq->iMbHeight;
-  bool bFrameCompleteFlag = true;
-
-  if (pPic->bNewSeqBegin) {
-    memcpy (& (pCtx->sFrameCrop), & (pCurDq->sLayerInfo.sSliceInLayer.sSliceHeaderExt.sSliceHeader.pSps->sFrameCrop),
-            sizeof (SPosOffset)); //confirmed_safe_unsafe_usage
-#ifdef LONG_TERM_REF
-    pCtx->bParamSetsLostFlag      = false;
-#else
-    pCtx->bReferenceLostAtT0Flag = false; // need initialize it due new seq, 6/4/2010
-#endif //LONG_TERM_REF
-    if (pCtx->iTotalNumMbRec == kiTotalNumMbInCurLayer) {
-      pCtx->bPrintFrameErrorTraceFlag = true;
-      WelsLog (& (pCtx->sLogCtx), WELS_LOG_INFO,
-               "DecodeFrameConstruction(): will output first frame of new sequence, %d x %d, crop_left:%d, crop_right:%d, crop_top:%d, crop_bottom:%d, ignored error packet:%d.",
-               kiWidth, kiHeight, pCtx->sFrameCrop.iLeftOffset, pCtx->sFrameCrop.iRightOffset, pCtx->sFrameCrop.iTopOffset,
-               pCtx->sFrameCrop.iBottomOffset, pCtx->iIgnoredErrorInfoPacketCount);
-      pCtx->iIgnoredErrorInfoPacketCount = 0;
-    }
-  }
-
-  const int32_t kiActualWidth = kiWidth - (pCtx->sFrameCrop.iLeftOffset + pCtx->sFrameCrop.iRightOffset) * 2;
-  const int32_t kiActualHeight = kiHeight - (pCtx->sFrameCrop.iTopOffset + pCtx->sFrameCrop.iBottomOffset) * 2;
-
-
-  if (pCtx->pParam->eEcActiveIdc == ERROR_CON_DISABLE) {
-    if ((pCtx->sDecoderStatistics.uiWidth != (unsigned int) kiActualWidth)
-        || (pCtx->sDecoderStatistics.uiHeight != (unsigned int) kiActualHeight)) {
-      pCtx->sDecoderStatistics.uiResolutionChangeTimes++;
-      pCtx->sDecoderStatistics.uiWidth = kiActualWidth;
-      pCtx->sDecoderStatistics.uiHeight = kiActualHeight;
-    }
-    UpdateDecStatNoFreezingInfo (pCtx);
-  }
-
-  if (pCtx->pParam->bParseOnly) { //should exit for parse only to prevent access NULL pDstInfo
-    PAccessUnit pCurAu = pCtx->pAccessUnitList;
-    if (dsErrorFree == pCtx->iErrorCode) { //correct decoding, add to data buffer
-      SParserBsInfo* pParser = pCtx->pParserBsInfo;
-      SNalUnit* pCurNal = NULL;
-      int32_t iTotalNalLen = 0;
-      int32_t iNalLen = 0;
-      int32_t iNum = 0;
-      while (iNum < pParser->iNalNum) {
-        iTotalNalLen += pParser->pNalLenInByte[iNum++];
-      }
-      uint8_t* pDstBuf = pParser->pDstBuff + iTotalNalLen;
-      int32_t iIdx = pCurAu->uiStartPos;
-      int32_t iEndIdx = pCurAu->uiEndPos;
-      uint8_t* pNalBs = NULL;
-      pParser->uiOutBsTimeStamp = (pCurAu->pNalUnitsList [iIdx]) ? pCurAu->pNalUnitsList [iIdx]->uiTimeStamp : 0;
-      //pParser->iNalNum = 0;
-      pParser->iSpsWidthInPixel = (pCtx->pSps->iMbWidth << 4) - ((pCtx->pSps->sFrameCrop.iLeftOffset +
-                                  pCtx->pSps->sFrameCrop.iRightOffset) << 1);
-      pParser->iSpsHeightInPixel = (pCtx->pSps->iMbHeight << 4) - ((pCtx->pSps->sFrameCrop.iTopOffset +
-                                   pCtx->pSps->sFrameCrop.iBottomOffset) << 1);
-
-      if (pCurAu->pNalUnitsList [iIdx]->sNalHeaderExt.bIdrFlag) { //IDR
-        if (pCtx->bFrameFinish) { //add required sps/pps
-          if (pParser->iNalNum > pCtx->iMaxNalNum - 2) { //2 reserved for sps+pps
-            WelsLog (& (pCtx->sLogCtx), WELS_LOG_INFO,
-                     "DecodeFrameConstruction(): current NAL num (%d) plus sps & pps exceeds permitted num (%d). Will expand",
-                     pParser->iNalNum, pCtx->iMaxNalNum);
-            WELS_VERIFY_RETURN_IF (ERR_INFO_OUT_OF_MEMORY, ExpandBsLenBuffer (pCtx, pParser->iNalNum + 2))
-          }
-          bool bSubSps = (NAL_UNIT_CODED_SLICE_EXT == pCurAu->pNalUnitsList [iIdx]->sNalHeaderExt.sNalUnitHeader.eNalUnitType);
-          SSpsBsInfo* pSpsBs = NULL;
-          SPpsBsInfo* pPpsBs = NULL;
-          int32_t iSpsId = pCtx->pSps->iSpsId;
-          int32_t iPpsId = pCtx->pPps->iPpsId;
-          pCtx->bParamSetsLostFlag = false;
-          //find required sps, pps and write into dst buff
-          pSpsBs = bSubSps ? &pCtx->sSubsetSpsBsInfo [iSpsId] : &pCtx->sSpsBsInfo [iSpsId];
-          pPpsBs = &pCtx->sPpsBsInfo [iPpsId];
-          if (pDstBuf - pParser->pDstBuff + pSpsBs->uiSpsBsLen + pPpsBs->uiPpsBsLen >= MAX_ACCESS_UNIT_CAPACITY) {
-            WelsLog (& (pCtx->sLogCtx), WELS_LOG_ERROR,
-                     "DecodeFrameConstruction(): sps pps size: (%d %d) too large. Failed to parse. \n", pSpsBs->uiSpsBsLen,
-                     pPpsBs->uiPpsBsLen);
-            pCtx->iErrorCode |= dsOutOfMemory;
-            pCtx->pParserBsInfo->iNalNum = 0;
-            return ERR_INFO_OUT_OF_MEMORY;
-          }
-          memcpy (pDstBuf, pSpsBs->pSpsBsBuf, pSpsBs->uiSpsBsLen);
-          pParser->pNalLenInByte [pParser->iNalNum ++] = pSpsBs->uiSpsBsLen;
-          pDstBuf += pSpsBs->uiSpsBsLen;
-          memcpy (pDstBuf, pPpsBs->pPpsBsBuf, pPpsBs->uiPpsBsLen);
-          pParser->pNalLenInByte [pParser->iNalNum ++] = pPpsBs->uiPpsBsLen;
-          pDstBuf += pPpsBs->uiPpsBsLen;
-          pCtx->bFrameFinish = false;
-        }
-      }
-      //then VCL data re-write
-      if (pParser->iNalNum + iEndIdx - iIdx + 1 > pCtx->iMaxNalNum) { //calculate total NAL num
-        WelsLog (& (pCtx->sLogCtx), WELS_LOG_INFO,
-                 "DecodeFrameConstruction(): current NAL num (%d) exceeds permitted num (%d). Will expand",
-                 pParser->iNalNum + iEndIdx - iIdx + 1, pCtx->iMaxNalNum);
-        WELS_VERIFY_RETURN_IF (ERR_INFO_OUT_OF_MEMORY, ExpandBsLenBuffer (pCtx, pParser->iNalNum + iEndIdx - iIdx + 1))
-      }
-      while (iIdx <= iEndIdx) {
-        pCurNal = pCurAu->pNalUnitsList [iIdx ++];
-        iNalLen = pCurNal->sNalData.sVclNal.iNalLength;
-        pNalBs = pCurNal->sNalData.sVclNal.pNalPos;
-        pParser->pNalLenInByte [pParser->iNalNum ++] = iNalLen;
-        if (pDstBuf - pParser->pDstBuff + iNalLen >= MAX_ACCESS_UNIT_CAPACITY) {
-          WelsLog (& (pCtx->sLogCtx), WELS_LOG_ERROR,
-                   "DecodeFrameConstruction(): composed output size (%ld) exceeds (%d). Failed to parse. current data pos %d out of %d:, previously accumulated num: %d, total num: %d, previously accumulated len: %d, current len: %d, current buf pos: %p, header buf pos: %p \n",
-                   (long) (pDstBuf - pParser->pDstBuff + iNalLen), MAX_ACCESS_UNIT_CAPACITY, iIdx, iEndIdx, iNum, pParser->iNalNum,
-                   iTotalNalLen, iNalLen, pDstBuf, pParser->pDstBuff);
-          pCtx->iErrorCode |= dsOutOfMemory;
-          pCtx->pParserBsInfo->iNalNum = 0;
-          return ERR_INFO_OUT_OF_MEMORY;
-        }
-
-        memcpy (pDstBuf, pNalBs, iNalLen);
-        pDstBuf += iNalLen;
-      }
-      if (pCtx->iTotalNumMbRec == kiTotalNumMbInCurLayer) { //frame complete
-        pCtx->iTotalNumMbRec = 0;
-        pCtx->bFramePending = false;
-        pCtx->bFrameFinish = true; //finish current frame and mark it
-      } else if (pCtx->iTotalNumMbRec != 0) { //frame incomplete
-        pCtx->bFramePending = true;
-        pCtx->pDec->bIsComplete = false;
-        pCtx->bFrameFinish = false; //current frame not finished
-        pCtx->iErrorCode |= dsFramePending;
-        return ERR_INFO_PARSEONLY_PENDING;
-        //pCtx->pParserBsInfo->iNalNum = 0;
-      }
-    } else { //error
-      pCtx->pParserBsInfo->uiOutBsTimeStamp = 0;
-      pCtx->pParserBsInfo->iNalNum = 0;
-      pCtx->pParserBsInfo->iSpsWidthInPixel = 0;
-      pCtx->pParserBsInfo->iSpsHeightInPixel = 0;
-      return ERR_INFO_PARSEONLY_ERROR;
-    }
-    return ERR_NONE;
-  }
-
-  if (pCtx->iTotalNumMbRec != kiTotalNumMbInCurLayer) {
-    WelsLog (& (pCtx->sLogCtx), WELS_LOG_DEBUG,
-             "DecodeFrameConstruction(): iTotalNumMbRec:%d, total_num_mb_sps:%d, cur_layer_mb_width:%d, cur_layer_mb_height:%d ",
-             pCtx->iTotalNumMbRec, kiTotalNumMbInCurLayer, pCurDq->iMbWidth, pCurDq->iMbHeight);
-    bFrameCompleteFlag = false; //return later after output buffer is done
-    if (pCtx->bInstantDecFlag) //no-delay decoding, wait for new slice
-      return ERR_INFO_MB_NUM_INADEQUATE;
-  } else if (pCurDq->sLayerInfo.sNalHeaderExt.bIdrFlag
-             && (pCtx->iErrorCode == dsErrorFree)) { //complete non-ECed IDR frame done
-    pCtx->pDec->bIsComplete = true;
-    pCtx->bFreezeOutput = false;
-  }
-
-  pCtx->iTotalNumMbRec = 0;
-
-  //////output:::normal path
-  pDstInfo->uiOutYuvTimeStamp = pPic->uiTimeStamp;
-  ppDst[0]      = pPic->pData[0];
-  ppDst[1]      = pPic->pData[1];
-  ppDst[2]      = pPic->pData[2];
-
-  pDstInfo->UsrData.sSystemBuffer.iFormat = videoFormatI420;
-
-  pDstInfo->UsrData.sSystemBuffer.iWidth = kiActualWidth;
-  pDstInfo->UsrData.sSystemBuffer.iHeight = kiActualHeight;
-  pDstInfo->UsrData.sSystemBuffer.iStride[0] = pPic->iLinesize[0];
-  pDstInfo->UsrData.sSystemBuffer.iStride[1] = pPic->iLinesize[1];
-  ppDst[0] = ppDst[0] + pCtx->sFrameCrop.iTopOffset * 2 * pPic->iLinesize[0] + pCtx->sFrameCrop.iLeftOffset * 2;
-  ppDst[1] = ppDst[1] + pCtx->sFrameCrop.iTopOffset  * pPic->iLinesize[1] + pCtx->sFrameCrop.iLeftOffset;
-  ppDst[2] = ppDst[2] + pCtx->sFrameCrop.iTopOffset  * pPic->iLinesize[1] + pCtx->sFrameCrop.iLeftOffset;
-  pDstInfo->iBufferStatus = 1;
-
-  bool bOutResChange = (pCtx->iLastImgWidthInPixel != pDstInfo->UsrData.sSystemBuffer.iWidth)
-                       || (pCtx->iLastImgHeightInPixel != pDstInfo->UsrData.sSystemBuffer.iHeight);
-  pCtx->iLastImgWidthInPixel = pDstInfo->UsrData.sSystemBuffer.iWidth;
-  pCtx->iLastImgHeightInPixel = pDstInfo->UsrData.sSystemBuffer.iHeight;
-  if (pCtx->pParam->eEcActiveIdc == ERROR_CON_DISABLE) //no buffer output if EC is disabled and frame incomplete
-    pDstInfo->iBufferStatus = (int32_t) (bFrameCompleteFlag
-                                         && pPic->bIsComplete); // When EC disable, ECed picture not output
-  else if ((pCtx->pParam->eEcActiveIdc == ERROR_CON_SLICE_COPY_CROSS_IDR_FREEZE_RES_CHANGE
-            || pCtx->pParam->eEcActiveIdc == ERROR_CON_SLICE_MV_COPY_CROSS_IDR_FREEZE_RES_CHANGE)
-           && pCtx->iErrorCode && bOutResChange)
-    pCtx->bFreezeOutput = true;
-
-  if (pDstInfo->iBufferStatus == 0) {
-    if (!bFrameCompleteFlag)
-      pCtx->iErrorCode |= dsBitstreamError;
-    return ERR_INFO_MB_NUM_INADEQUATE;
-  }
-  if (pCtx->bFreezeOutput) {
-    pDstInfo->iBufferStatus = 0;
-    if (pPic->bNewSeqBegin) {
-      WelsLog (& (pCtx->sLogCtx), WELS_LOG_INFO,
-               "DecodeFrameConstruction():New sequence detected, but freezed, correct MBs (%d) out of whole MBs (%d).",
-               kiTotalNumMbInCurLayer - pCtx->iMbEcedNum, kiTotalNumMbInCurLayer);
-    }
-  }
-  pCtx->iMbEcedNum = pPic->iMbEcedNum;
-  pCtx->iMbNum = pPic->iMbNum;
-  pCtx->iMbEcedPropNum = pPic->iMbEcedPropNum;
-  if (pCtx->pParam->eEcActiveIdc != ERROR_CON_DISABLE) {
-    if (pDstInfo->iBufferStatus && ((pCtx->sDecoderStatistics.uiWidth != (unsigned int) kiActualWidth)
-                                    || (pCtx->sDecoderStatistics.uiHeight != (unsigned int) kiActualHeight))) {
-      pCtx->sDecoderStatistics.uiResolutionChangeTimes++;
-      pCtx->sDecoderStatistics.uiWidth = kiActualWidth;
-      pCtx->sDecoderStatistics.uiHeight = kiActualHeight;
-    }
-    UpdateDecStat (pCtx, pDstInfo->iBufferStatus != 0);
-  }
-  return ERR_NONE;
-}
-
-inline bool    CheckSliceNeedReconstruct (uint8_t uiLayerDqId, uint8_t uiTargetDqId) {
-  return (uiLayerDqId == uiTargetDqId); // target layer
-}
-
-inline uint8_t GetTargetDqId (uint8_t uiTargetDqId,  SDecodingParam* psParam) {
-  uint8_t  uiRequiredDqId = psParam ? psParam->uiTargetDqLayer : (uint8_t)255;
-
-  return WELS_MIN (uiTargetDqId, uiRequiredDqId);
-}
-
-
-inline void    HandleReferenceLostL0 (PWelsDecoderContext pCtx, PNalUnit pCurNal) {
-  if (0 == pCurNal->sNalHeaderExt.uiTemporalId) {
-    pCtx->bReferenceLostAtT0Flag = true;
-  }
-  pCtx->iErrorCode |= dsBitstreamError;
-}
-
-inline void    HandleReferenceLost (PWelsDecoderContext pCtx, PNalUnit pCurNal) {
-  if ((0 == pCurNal->sNalHeaderExt.uiTemporalId) || (1 == pCurNal->sNalHeaderExt.uiTemporalId)) {
-    pCtx->bReferenceLostAtT0Flag = true;
-  }
-  pCtx->iErrorCode |= dsRefLost;
-}
-
-inline int32_t  WelsDecodeConstructSlice (PWelsDecoderContext pCtx, PNalUnit pCurNal) {
-  int32_t  iRet = WelsTargetSliceConstruction (pCtx);
-
-  if (iRet) {
-    HandleReferenceLostL0 (pCtx, pCurNal);
-  }
-
-  return iRet;
-}
-
-int32_t ParsePredWeightedTable (PBitStringAux pBs, PSliceHeader pSh) {
-  uint32_t uiCode;
-  int32_t iList = 0;
-  int32_t iCode;
-
-  WELS_READ_VERIFY (BsGetUe (pBs, &uiCode));
-  WELS_CHECK_SE_BOTH_ERROR_NOLOG (uiCode, 0, 7, "luma_log2_weight_denom",
-                                  GENERATE_ERROR_NO (ERR_LEVEL_SLICE_HEADER, ERR_INFO_INVALID_LUMA_LOG2_WEIGHT_DENOM));
-  pSh->sPredWeightTable.uiLumaLog2WeightDenom = uiCode;
-  if (pSh->pSps->uiChromaArrayType != 0) {
-    WELS_READ_VERIFY (BsGetUe (pBs, &uiCode));
-    WELS_CHECK_SE_BOTH_ERROR_NOLOG (uiCode, 0, 7, "chroma_log2_weight_denom",
-                                    GENERATE_ERROR_NO (ERR_LEVEL_SLICE_HEADER, ERR_INFO_INVALID_CHROMA_LOG2_WEIGHT_DENOM));
-    pSh->sPredWeightTable.uiChromaLog2WeightDenom = uiCode;
-  }
-
-
-  do {
-
-    for (int i = 0; i < pSh->uiRefCount[iList]; i++) {
-      //luma
-      WELS_READ_VERIFY (BsGetOneBit (pBs, &uiCode));
-      if (!!uiCode) {
-
-        WELS_READ_VERIFY (BsGetSe (pBs, &iCode));
-        WELS_CHECK_SE_BOTH_ERROR_NOLOG (iCode, -128, 127, "luma_weight",
-                                        GENERATE_ERROR_NO (ERR_LEVEL_SLICE_HEADER, ERR_INFO_INVALID_LUMA_WEIGHT));
-        pSh->sPredWeightTable.sPredList[iList].iLumaWeight[i] = iCode;
-
-        WELS_READ_VERIFY (BsGetSe (pBs, &iCode));
-        WELS_CHECK_SE_BOTH_ERROR_NOLOG (iCode, -128, 127, "luma_offset",
-                                        GENERATE_ERROR_NO (ERR_LEVEL_SLICE_HEADER, ERR_INFO_INVALID_LUMA_OFFSET));
-        pSh->sPredWeightTable.sPredList[iList].iLumaOffset[i] = iCode;
-      } else {
-        pSh->sPredWeightTable.sPredList[iList].iLumaWeight[i] = 1 << (pSh->sPredWeightTable.uiLumaLog2WeightDenom);
-        pSh->sPredWeightTable.sPredList[iList].iLumaOffset[i] = 0;
-
-      }
-      //chroma
-      if (pSh->pSps->uiChromaArrayType == 0)
-        continue;
-
-      WELS_READ_VERIFY (BsGetOneBit (pBs, &uiCode));
-      if (!!uiCode) {
-        for (int j = 0; j < 2; j++) {
-
-
-          WELS_READ_VERIFY (BsGetSe (pBs, &iCode));
-          WELS_CHECK_SE_BOTH_ERROR_NOLOG (iCode, -128, 127, "chroma_weight",
-                                          GENERATE_ERROR_NO (ERR_LEVEL_SLICE_HEADER, ERR_INFO_INVALID_CHROMA_WEIGHT));
-          pSh->sPredWeightTable.sPredList[iList].iChromaWeight[i][j] = iCode;
-
-          WELS_READ_VERIFY (BsGetSe (pBs, &iCode));
-          WELS_CHECK_SE_BOTH_ERROR_NOLOG (iCode, -128, 127, "chroma_offset",
-                                          GENERATE_ERROR_NO (ERR_LEVEL_SLICE_HEADER, ERR_INFO_INVALID_CHROMA_OFFSET));
-          pSh->sPredWeightTable.sPredList[iList].iChromaOffset[i][j] = iCode;
-        }
-      } else {
-        for (int j = 0; j < 2; j++) {
-
-
-          pSh->sPredWeightTable.sPredList[iList].iChromaWeight[i][j] = 1 << (pSh->sPredWeightTable.uiChromaLog2WeightDenom);
-          pSh->sPredWeightTable.sPredList[iList].iChromaOffset[i][j] = 0;
-        }
-      }
-
-    }
-    ++iList;
-  } while (iList < LIST_1);//TODO: SUPPORT LIST_A
-  return ERR_NONE;
-}
-
-/*
- *  Predeclared function routines ..
- */
-int32_t ParseRefPicListReordering (PBitStringAux pBs, PSliceHeader pSh) {
-  int32_t iList = 0;
-  const EWelsSliceType keSt = pSh->eSliceType;
-  PRefPicListReorderSyn pRefPicListReordering = &pSh->pRefPicListReordering;
-  PSps pSps = pSh->pSps;
-  uint32_t uiCode;
-  if (keSt == I_SLICE || keSt == SI_SLICE)
-    return ERR_NONE;
-
-  // Common syntaxs for P or B slices: list0, list1 followed if B slices used.
-  do {
-    WELS_READ_VERIFY (BsGetOneBit (pBs, &uiCode)); //ref_pic_list_modification_flag_l0
-    pRefPicListReordering->bRefPicListReorderingFlag[iList] = !!uiCode;
-
-    if (pRefPicListReordering->bRefPicListReorderingFlag[iList]) {
-      int32_t iIdx = 0;
-      do {
-        WELS_READ_VERIFY (BsGetUe (pBs, &uiCode)); //modification_of_pic_nums_idc
-        const uint32_t kuiIdc = uiCode;
-
-        //Fixed the referrence list reordering crash issue.(fault kIdc value > 3 case)---
-        if ((iIdx >= MAX_REF_PIC_COUNT) || (kuiIdc > 3)) {
-          return GENERATE_ERROR_NO (ERR_LEVEL_SLICE_HEADER, ERR_INFO_INVALID_REF_REORDERING);
-        }
-        pRefPicListReordering->sReorderingSyn[iList][iIdx].uiReorderingOfPicNumsIdc = kuiIdc;
-        if (kuiIdc == 3)
-          break;
-
-        if (iIdx >= pSh->uiRefCount[iList] || iIdx >= MAX_REF_PIC_COUNT)
-          return GENERATE_ERROR_NO (ERR_LEVEL_SLICE_HEADER, ERR_INFO_INVALID_REF_REORDERING);
-
-        if (kuiIdc == 0 || kuiIdc == 1) {
-          // abs_diff_pic_num_minus1 should be in range 0 to MaxPicNum-1, MaxPicNum is derived as
-          // 2^(4+log2_max_frame_num_minus4)
-          WELS_READ_VERIFY (BsGetUe (pBs, &uiCode)); //abs_diff_pic_num_minus1
-          WELS_CHECK_SE_UPPER_ERROR_NOLOG (uiCode, (uint32_t) (1 << pSps->uiLog2MaxFrameNum), "abs_diff_pic_num_minus1",
-                                           GENERATE_ERROR_NO (ERR_LEVEL_SLICE_HEADER, ERR_INFO_INVALID_REF_REORDERING));
-          pRefPicListReordering->sReorderingSyn[iList][iIdx].uiAbsDiffPicNumMinus1 = uiCode; // uiAbsDiffPicNumMinus1
-        } else if (kuiIdc == 2) {
-          WELS_READ_VERIFY (BsGetUe (pBs, &uiCode)); //long_term_pic_num
-          pRefPicListReordering->sReorderingSyn[iList][iIdx].uiLongTermPicNum = uiCode;
-        }
-
-        ++ iIdx;
-      } while (true);
-    }
-    if (keSt != B_SLICE)
-      break;
-    ++ iList;
-  } while (iList < LIST_A);
-
-  return ERR_NONE;
-}
-
-int32_t ParseDecRefPicMarking (PWelsDecoderContext pCtx, PBitStringAux pBs, PSliceHeader pSh, PSps pSps,
-                               const bool kbIdrFlag) {
-  PRefPicMarking const kpRefMarking = &pSh->sRefMarking;
-  uint32_t uiCode;
-  if (kbIdrFlag) {
-    WELS_READ_VERIFY (BsGetOneBit (pBs, &uiCode)); //no_output_of_prior_pics_flag
-    kpRefMarking->bNoOutputOfPriorPicsFlag = !!uiCode;
-    WELS_READ_VERIFY (BsGetOneBit (pBs, &uiCode)); //long_term_reference_flag
-    kpRefMarking->bLongTermRefFlag = !!uiCode;
-  } else {
-    WELS_READ_VERIFY (BsGetOneBit (pBs, &uiCode)); //adaptive_ref_pic_marking_mode_flag
-    kpRefMarking->bAdaptiveRefPicMarkingModeFlag = !!uiCode;
-    if (kpRefMarking->bAdaptiveRefPicMarkingModeFlag) {
-      int32_t iIdx = 0;
-      bool bAllowMmco5 = true, bMmco4Exist = false, bMmco5Exist = false, bMmco6Exist = false;
-      do {
-        WELS_READ_VERIFY (BsGetUe (pBs, &uiCode)); //memory_management_control_operation
-        const uint32_t kuiMmco = uiCode;
-
-        kpRefMarking->sMmcoRef[iIdx].uiMmcoType = kuiMmco;
-        if (kuiMmco == MMCO_END)
-          break;
-
-        if (kuiMmco == MMCO_SHORT2UNUSED || kuiMmco == MMCO_SHORT2LONG) {
-          bAllowMmco5 = false;
-          WELS_READ_VERIFY (BsGetUe (pBs, &uiCode)); //difference_of_pic_nums_minus1
-          kpRefMarking->sMmcoRef[iIdx].iDiffOfPicNum = 1 + uiCode;
-          kpRefMarking->sMmcoRef[iIdx].iShortFrameNum = (pSh->iFrameNum - kpRefMarking->sMmcoRef[iIdx].iDiffOfPicNum) & ((
-                1 << pSps->uiLog2MaxFrameNum) - 1);
-        } else if (kuiMmco == MMCO_LONG2UNUSED) {
-          bAllowMmco5 = false;
-          WELS_READ_VERIFY (BsGetUe (pBs, &uiCode)); //long_term_pic_num
-          kpRefMarking->sMmcoRef[iIdx].uiLongTermPicNum = uiCode;
-        }
-        if (kuiMmco == MMCO_SHORT2LONG || kuiMmco == MMCO_LONG) {
-          if (kuiMmco == MMCO_LONG) {
-            WELS_VERIFY_RETURN_IF (-1, bMmco6Exist);
-            bMmco6Exist = true;
-          }
-          WELS_READ_VERIFY (BsGetUe (pBs, &uiCode)); //long_term_frame_idx
-          kpRefMarking->sMmcoRef[iIdx].iLongTermFrameIdx = uiCode;
-        } else if (kuiMmco == MMCO_SET_MAX_LONG) {
-          WELS_VERIFY_RETURN_IF (-1, bMmco4Exist);
-          bMmco4Exist = true;
-          WELS_READ_VERIFY (BsGetUe (pBs, &uiCode)); //max_long_term_frame_idx_plus1
-          kpRefMarking->sMmcoRef[iIdx].iMaxLongTermFrameIdx = -1 + uiCode;
-        } else if (kuiMmco == MMCO_RESET) {
-          WELS_VERIFY_RETURN_IF (-1, (!bAllowMmco5 || bMmco5Exist));
-          bMmco5Exist = true;
-        }
-        ++ iIdx;
-
-      } while (iIdx < MAX_MMCO_COUNT);
-    }
-  }
-
-  return ERR_NONE;
-}
-
-bool FillDefaultSliceHeaderExt (PSliceHeaderExt pShExt, PNalUnitHeaderExt pNalExt) {
-  if (pShExt == NULL || pNalExt == NULL)
-    return false;
-
-  if (pNalExt->iNoInterLayerPredFlag || pNalExt->uiQualityId > 0)
-    pShExt->bBasePredWeightTableFlag = false;
-  else
-    pShExt->bBasePredWeightTableFlag = true;
-  pShExt->uiRefLayerDqId = (uint8_t) - 1;
-  pShExt->uiDisableInterLayerDeblockingFilterIdc        = 0;
-  pShExt->iInterLayerSliceAlphaC0Offset                 = 0;
-  pShExt->iInterLayerSliceBetaOffset                    = 0;
-  pShExt->bConstrainedIntraResamplingFlag               = false;
-  pShExt->uiRefLayerChromaPhaseXPlus1Flag               = 0;
-  pShExt->uiRefLayerChromaPhaseYPlus1                   = 1;
-  //memset(&pShExt->sScaledRefLayer, 0, sizeof(SPosOffset));
-
-  pShExt->iScaledRefLayerPicWidthInSampleLuma   = pShExt->sSliceHeader.iMbWidth << 4;
-  pShExt->iScaledRefLayerPicHeightInSampleLuma  = pShExt->sSliceHeader.iMbHeight << 4;
-
-  pShExt->bSliceSkipFlag                = false;
-  pShExt->bAdaptiveBaseModeFlag         = false;
-  pShExt->bDefaultBaseModeFlag          = false;
-  pShExt->bAdaptiveMotionPredFlag       = false;
-  pShExt->bDefaultMotionPredFlag        = false;
-  pShExt->bAdaptiveResidualPredFlag     = false;
-  pShExt->bDefaultResidualPredFlag      = false;
-  pShExt->bTCoeffLevelPredFlag          = false;
-  pShExt->uiScanIdxStart                = 0;
-  pShExt->uiScanIdxEnd                  = 15;
-
-  return true;
-}
-
-int32_t InitBsBuffer (PWelsDecoderContext pCtx) {
-  if (pCtx == NULL)
-    return ERR_INFO_INVALID_PTR;
-
-  CMemoryAlign* pMa = pCtx->pMemAlign;
-
-  pCtx->iMaxBsBufferSizeInByte = MIN_ACCESS_UNIT_CAPACITY * MAX_BUFFERED_NUM;
-  if ((pCtx->sRawData.pHead = static_cast<uint8_t*> (pMa->WelsMallocz (pCtx->iMaxBsBufferSizeInByte,
-                              "pCtx->sRawData.pHead"))) == NULL) {
-    return ERR_INFO_OUT_OF_MEMORY;
-  }
-  pCtx->sRawData.pStartPos = pCtx->sRawData.pCurPos = pCtx->sRawData.pHead;
-  pCtx->sRawData.pEnd = pCtx->sRawData.pHead + pCtx->iMaxBsBufferSizeInByte;
-  if (pCtx->pParam->bParseOnly) {
-    pCtx->pParserBsInfo = static_cast<SParserBsInfo*> (pMa->WelsMallocz (sizeof (SParserBsInfo), "pCtx->pParserBsInfo"));
-    if (pCtx->pParserBsInfo == NULL) {
-      return ERR_INFO_OUT_OF_MEMORY;
-    }
-    memset (pCtx->pParserBsInfo, 0, sizeof (SParserBsInfo));
-    pCtx->pParserBsInfo->pDstBuff = static_cast<uint8_t*> (pMa->WelsMallocz (MAX_ACCESS_UNIT_CAPACITY * sizeof (uint8_t),
-                                    "pCtx->pParserBsInfo->pDstBuff"));
-    if (pCtx->pParserBsInfo->pDstBuff == NULL) {
-      return ERR_INFO_OUT_OF_MEMORY;
-    }
-    memset (pCtx->pParserBsInfo->pDstBuff, 0, MAX_ACCESS_UNIT_CAPACITY * sizeof (uint8_t));
-
-    if ((pCtx->sSavedData.pHead = static_cast<uint8_t*> (pMa->WelsMallocz (pCtx->iMaxBsBufferSizeInByte,
-                                  "pCtx->sSavedData.pHead"))) == NULL) {
-      return ERR_INFO_OUT_OF_MEMORY;
-    }
-    pCtx->sSavedData.pStartPos = pCtx->sSavedData.pCurPos = pCtx->sSavedData.pHead;
-    pCtx->sSavedData.pEnd = pCtx->sSavedData.pHead + pCtx->iMaxBsBufferSizeInByte;
-
-    pCtx->iMaxNalNum = MAX_NAL_UNITS_IN_LAYER + 2; //2 reserved for SPS+PPS
-    pCtx->pParserBsInfo->pNalLenInByte = static_cast<int*> (pMa->WelsMallocz (pCtx->iMaxNalNum * sizeof (int),
-                                         "pCtx->pParserBsInfo->pNalLenInByte"));
-    if (pCtx->pParserBsInfo->pNalLenInByte == NULL) {
-      return ERR_INFO_OUT_OF_MEMORY;
-    }
-  }
-  return ERR_NONE;
-}
-
-int32_t ExpandBsBuffer (PWelsDecoderContext pCtx, const int32_t kiSrcLen) {
-  if (pCtx == NULL)
-    return ERR_INFO_INVALID_PTR;
-  int32_t iExpandStepShift = 1;
-  int32_t iNewBuffLen = WELS_MAX ((kiSrcLen * MAX_BUFFERED_NUM), (pCtx->iMaxBsBufferSizeInByte << iExpandStepShift));
-  //allocate new bs buffer
-  CMemoryAlign* pMa = pCtx->pMemAlign;
-
-  //Realloc sRawData
-  uint8_t* pNewBsBuff = static_cast<uint8_t*> (pMa->WelsMallocz (iNewBuffLen, "pCtx->sRawData.pHead"));
-  if (pNewBsBuff == NULL) {
-    WelsLog (& (pCtx->sLogCtx), WELS_LOG_ERROR, "ExpandBsBuffer() Failed for malloc pNewBsBuff (%d)", iNewBuffLen);
-    pCtx->iErrorCode |= dsOutOfMemory;
-    return ERR_INFO_OUT_OF_MEMORY;
-  }
-
-  //Calculate and set the bs start and end position
-  for (uint32_t i = 0; i <= pCtx->pAccessUnitList->uiActualUnitsNum; i++) {
-    PBitStringAux pSliceBitsRead = &pCtx->pAccessUnitList->pNalUnitsList[i]->sNalData.sVclNal.sSliceBitsRead;
-    pSliceBitsRead->pStartBuf = pSliceBitsRead->pStartBuf - pCtx->sRawData.pHead + pNewBsBuff;
-    pSliceBitsRead->pEndBuf   = pSliceBitsRead->pEndBuf   - pCtx->sRawData.pHead + pNewBsBuff;
-    pSliceBitsRead->pCurBuf   = pSliceBitsRead->pCurBuf   - pCtx->sRawData.pHead + pNewBsBuff;
-  }
-
-  //Copy current buffer status to new buffer
-  memcpy (pNewBsBuff, pCtx->sRawData.pHead, pCtx->iMaxBsBufferSizeInByte);
-  pCtx->sRawData.pStartPos = pNewBsBuff + (pCtx->sRawData.pStartPos - pCtx->sRawData.pHead);
-  pCtx->sRawData.pCurPos   = pNewBsBuff + (pCtx->sRawData.pCurPos   - pCtx->sRawData.pHead);
-  pCtx->sRawData.pEnd      = pNewBsBuff + iNewBuffLen;
-  pMa->WelsFree (pCtx->sRawData.pHead, "pCtx->sRawData.pHead");
-  pCtx->sRawData.pHead = pNewBsBuff;
-
-  if (pCtx->pParam->bParseOnly) {
-    //Realloc sSavedData
-    uint8_t* pNewSavedBsBuff = static_cast<uint8_t*> (pMa->WelsMallocz (iNewBuffLen, "pCtx->sSavedData.pHead"));
-    if (pNewSavedBsBuff == NULL) {
-      WelsLog (& (pCtx->sLogCtx), WELS_LOG_ERROR, "ExpandBsBuffer() Failed for malloc pNewSavedBsBuff (%d)", iNewBuffLen);
-      pCtx->iErrorCode |= dsOutOfMemory;
-      return ERR_INFO_OUT_OF_MEMORY;
-    }
-
-    //Copy current buffer status to new buffer
-    memcpy (pNewSavedBsBuff, pCtx->sSavedData.pHead, pCtx->iMaxBsBufferSizeInByte);
-    pCtx->sSavedData.pStartPos = pNewSavedBsBuff + (pCtx->sSavedData.pStartPos - pCtx->sSavedData.pHead);
-    pCtx->sSavedData.pCurPos   = pNewSavedBsBuff + (pCtx->sSavedData.pCurPos   - pCtx->sSavedData.pHead);
-    pCtx->sSavedData.pEnd      = pNewSavedBsBuff + iNewBuffLen;
-    pMa->WelsFree (pCtx->sSavedData.pHead, "pCtx->sSavedData.pHead");
-    pCtx->sSavedData.pHead = pNewSavedBsBuff;
-  }
-
-  pCtx->iMaxBsBufferSizeInByte = iNewBuffLen;
-  return ERR_NONE;
-}
-
-int32_t ExpandBsLenBuffer (PWelsDecoderContext pCtx, const int32_t kiCurrLen) {
-  SParserBsInfo* pParser = pCtx->pParserBsInfo;
-  if (!pParser->pNalLenInByte)
-    return ERR_INFO_INVALID_ACCESS;
-
-  int iNewLen = kiCurrLen;
-  if (kiCurrLen >= MAX_MB_SIZE + 2) { //exceeds the max MB number of level 5.2
-    WelsLog (& (pCtx->sLogCtx), WELS_LOG_WARNING, "Current nal num (%d) exceededs %d.", kiCurrLen, MAX_MB_SIZE);
-    pCtx->iErrorCode |= dsOutOfMemory;
-    return ERR_INFO_OUT_OF_MEMORY;
-  } else {
-    iNewLen = kiCurrLen << 1;
-    iNewLen = WELS_MIN (iNewLen, MAX_MB_SIZE + 2);
-  }
-
-  CMemoryAlign* pMa = pCtx->pMemAlign;
-  int* pNewLenBuffer = static_cast<int*> (pMa->WelsMallocz (iNewLen * sizeof (int),
-                                          "pCtx->pParserBsInfo->pNalLenInByte"));
-  if (pNewLenBuffer == NULL) {
-    pCtx->iErrorCode |= dsOutOfMemory;
-    return ERR_INFO_OUT_OF_MEMORY;
-  }
-
-  //copy existing data from old length buffer to new
-  memcpy (pNewLenBuffer, pParser->pNalLenInByte, pCtx->iMaxNalNum * sizeof (int));
-  pMa->WelsFree (pParser->pNalLenInByte, "pCtx->pParserBsInfo->pNalLenInByte");
-  pParser->pNalLenInByte = pNewLenBuffer;
-  pCtx->iMaxNalNum = iNewLen;
-  return ERR_NONE;
-}
-
-int32_t CheckBsBuffer (PWelsDecoderContext pCtx, const int32_t kiSrcLen) {
-  if (kiSrcLen > MAX_ACCESS_UNIT_CAPACITY) { //exceeds max allowed data
-    WelsLog (& (pCtx->sLogCtx), WELS_LOG_WARNING, "Max AU size exceeded. Allowed size = %d, current size = %d",
-             MAX_ACCESS_UNIT_CAPACITY,
-             kiSrcLen);
-    pCtx->iErrorCode |= dsBitstreamError;
-    return ERR_INFO_INVALID_ACCESS;
-  } else if (kiSrcLen > pCtx->iMaxBsBufferSizeInByte /
-             MAX_BUFFERED_NUM) { //may lead to buffer overwrite, prevent it by expanding buffer
-    if (ExpandBsBuffer (pCtx, kiSrcLen)) {
-      return ERR_INFO_OUT_OF_MEMORY;
-    }
-  }
-
-  return ERR_NONE;
-}
-
-/*
- * WelsInitStaticMemory
- * Memory request for new introduced data
- * Especially for:
- * rbsp_au_buffer, cur_dq_layer_ptr and ref_dq_layer_ptr in MB info cache.
- * return:
- *  0 - success; otherwise returned error_no defined in error_no.h.
-*/
-int32_t WelsInitStaticMemory (PWelsDecoderContext pCtx) {
-  if (pCtx == NULL) {
-    return ERR_INFO_INVALID_PTR;
-  }
-
-  if (MemInitNalList (&pCtx->pAccessUnitList, MAX_NAL_UNIT_NUM_IN_AU, pCtx->pMemAlign) != 0)
-    return ERR_INFO_OUT_OF_MEMORY;
-
-  if (InitBsBuffer (pCtx) != 0)
-    return ERR_INFO_OUT_OF_MEMORY;
-
-  pCtx->uiTargetDqId            = (uint8_t) - 1;
-  pCtx->bEndOfStreamFlag        = false;
-
-  return ERR_NONE;
-}
-
-/*
- * WelsFreeStaticMemory
- * Free memory introduced in WelsInitStaticMemory at destruction of decoder.
- *
- */
-void WelsFreeStaticMemory (PWelsDecoderContext pCtx) {
-  if (pCtx == NULL)
-    return;
-
-  CMemoryAlign* pMa = pCtx->pMemAlign;
-
-  MemFreeNalList (&pCtx->pAccessUnitList, pMa);
-
-  if (pCtx->sRawData.pHead) {
-    pMa->WelsFree (pCtx->sRawData.pHead, "pCtx->sRawData->pHead");
-  }
-  pCtx->sRawData.pHead                = NULL;
-  pCtx->sRawData.pEnd                 = NULL;
-  pCtx->sRawData.pStartPos            = NULL;
-  pCtx->sRawData.pCurPos              = NULL;
-  if (pCtx->pParam->bParseOnly) {
-    if (pCtx->sSavedData.pHead) {
-      pMa->WelsFree (pCtx->sSavedData.pHead, "pCtx->sSavedData->pHead");
-    }
-    pCtx->sSavedData.pHead                = NULL;
-    pCtx->sSavedData.pEnd                 = NULL;
-    pCtx->sSavedData.pStartPos            = NULL;
-    pCtx->sSavedData.pCurPos              = NULL;
-    if (pCtx->pParserBsInfo) {
-      if (pCtx->pParserBsInfo->pNalLenInByte) {
-        pMa->WelsFree (pCtx->pParserBsInfo->pNalLenInByte, "pCtx->pParserBsInfo->pNalLenInByte");
-        pCtx->pParserBsInfo->pNalLenInByte = NULL;
-        pCtx->iMaxNalNum = 0;
-      }
-      if (pCtx->pParserBsInfo->pDstBuff) {
-        pMa->WelsFree (pCtx->pParserBsInfo->pDstBuff, "pCtx->pParserBsInfo->pDstBuff");
-        pCtx->pParserBsInfo->pDstBuff = NULL;
-      }
-      pMa->WelsFree (pCtx->pParserBsInfo, "pCtx->pParserBsInfo");
-      pCtx->pParserBsInfo = NULL;
-    }
-  }
-
-  if (NULL != pCtx->pParam) {
-    pMa->WelsFree (pCtx->pParam, "pCtx->pParam");
-
-    pCtx->pParam = NULL;
-  }
-}
-/*
- *  DecodeNalHeaderExt
- *  Trigger condition: NAL_UNIT_TYPE = NAL_UNIT_PREFIX or NAL_UNIT_CODED_SLICE_EXT
- *  Parameter:
- *  pNal:   target NALUnit ptr
- *  pSrc:   NAL Unit bitstream
- */
-void DecodeNalHeaderExt (PNalUnit pNal, uint8_t* pSrc) {
-  PNalUnitHeaderExt pHeaderExt = &pNal->sNalHeaderExt;
-
-  uint8_t uiCurByte = *pSrc;
-  pHeaderExt->bIdrFlag              = !! (uiCurByte & 0x40);
-  pHeaderExt->uiPriorityId          = uiCurByte & 0x3F;
-
-  uiCurByte = * (++pSrc);
-  pHeaderExt->iNoInterLayerPredFlag = uiCurByte >> 7;
-  pHeaderExt->uiDependencyId        = (uiCurByte & 0x70) >> 4;
-  pHeaderExt->uiQualityId           = uiCurByte & 0x0F;
-  uiCurByte = * (++pSrc);
-  pHeaderExt->uiTemporalId          = uiCurByte >> 5;
-  pHeaderExt->bUseRefBasePicFlag    = !! (uiCurByte & 0x10);
-  pHeaderExt->bDiscardableFlag      = !! (uiCurByte & 0x08);
-  pHeaderExt->bOutputFlag           = !! (uiCurByte & 0x04);
-  pHeaderExt->uiReservedThree2Bits  = uiCurByte & 0x03;
-  pHeaderExt->uiLayerDqId           = (pHeaderExt->uiDependencyId << 4) | pHeaderExt->uiQualityId;
-}
-
-
-void UpdateDecoderStatisticsForActiveParaset (SDecoderStatistics* pDecoderStatistics,
-    PSps pSps, PPps pPps) {
-  pDecoderStatistics->iCurrentActiveSpsId = pSps->iSpsId;
-
-  pDecoderStatistics->iCurrentActivePpsId = pPps->iPpsId;
-  pDecoderStatistics->uiProfile = static_cast<unsigned int> (pSps->uiProfileIdc);
-  pDecoderStatistics->uiLevel = pSps->uiLevelIdc;
-}
-
-#define SLICE_HEADER_IDR_PIC_ID_MAX 65535
-#define SLICE_HEADER_REDUNDANT_PIC_CNT_MAX 127
-#define SLICE_HEADER_ALPHAC0_BETA_OFFSET_MIN -12
-#define SLICE_HEADER_ALPHAC0_BETA_OFFSET_MAX 12
-#define SLICE_HEADER_INTER_LAYER_ALPHAC0_BETA_OFFSET_MIN -12
-#define SLICE_HEADER_INTER_LAYER_ALPHAC0_BETA_OFFSET_MAX 12
-#define MAX_NUM_REF_IDX_L0_ACTIVE_MINUS1 15
-#define SLICE_HEADER_CABAC_INIT_IDC_MAX 2
-/*
- *  decode_slice_header_avc
- *  Parse slice header of bitstream in avc for storing data structure
- */
-int32_t ParseSliceHeaderSyntaxs (PWelsDecoderContext pCtx, PBitStringAux pBs, const bool kbExtensionFlag) {
-  PNalUnit const kpCurNal               = pCtx->pAccessUnitList->pNalUnitsList[pCtx->pAccessUnitList->uiAvailUnitsNum -
-                                          1];
-
-  PNalUnitHeaderExt pNalHeaderExt       = NULL;
-  PSliceHeader pSliceHead               = NULL;
-  PSliceHeaderExt pSliceHeadExt         = NULL;
-  PSubsetSps pSubsetSps                 = NULL;
-  PSps pSps                             = NULL;
-  PPps pPps                             = NULL;
-  EWelsNalUnitType eNalType             = static_cast<EWelsNalUnitType> (0);
-  int32_t iPpsId                        = 0;
-  int32_t iRet                          = ERR_NONE;
-  uint8_t uiSliceType                   = 0;
-  uint8_t uiQualityId                   = BASE_QUALITY_ID;
-  bool  bIdrFlag                        = false;
-  bool  bSgChangeCycleInvolved          = false;        // involved slice group change cycle ?
-  uint32_t uiCode;
-  int32_t iCode;
-  SLogContext* pLogCtx = & (pCtx->sLogCtx);
-
-  if (kpCurNal == NULL) {
-    return ERR_INFO_OUT_OF_MEMORY;
-  }
-
-  pNalHeaderExt = &kpCurNal->sNalHeaderExt;
-  pSliceHead    = &kpCurNal->sNalData.sVclNal.sSliceHeaderExt.sSliceHeader;
-  eNalType      = pNalHeaderExt->sNalUnitHeader.eNalUnitType;
-
-  pSliceHeadExt = &kpCurNal->sNalData.sVclNal.sSliceHeaderExt;
-
-  if (pSliceHeadExt) {
-    SRefBasePicMarking sBaseMarking;
-    const bool kbStoreRefBaseFlag = pSliceHeadExt->bStoreRefBasePicFlag;
-    memcpy (&sBaseMarking, &pSliceHeadExt->sRefBasePicMarking, sizeof (SRefBasePicMarking)); //confirmed_safe_unsafe_usage
-    memset (pSliceHeadExt, 0, sizeof (SSliceHeaderExt));
-    pSliceHeadExt->bStoreRefBasePicFlag = kbStoreRefBaseFlag;
-    memcpy (&pSliceHeadExt->sRefBasePicMarking, &sBaseMarking, sizeof (SRefBasePicMarking)); //confirmed_safe_unsafe_usage
-  }
-
-  kpCurNal->sNalData.sVclNal.bSliceHeaderExtFlag = kbExtensionFlag;
-
-  // first_mb_in_slice
-  WELS_READ_VERIFY (BsGetUe (pBs, &uiCode)); //first_mb_in_slice
-  WELS_CHECK_SE_UPPER_ERROR (uiCode, 36863u, "first_mb_in_slice", GENERATE_ERROR_NO (ERR_LEVEL_SLICE_HEADER,
-                             ERR_INFO_INVALID_FIRST_MB_IN_SLICE));
-  pSliceHead->iFirstMbInSlice = uiCode;
-
-  WELS_READ_VERIFY (BsGetUe (pBs, &uiCode)); //slice_type
-  uiSliceType = uiCode;
-  if (uiSliceType > 9) {
-    WelsLog (pLogCtx, WELS_LOG_WARNING, "slice type too large (%d) at first_mb(%d)", uiSliceType,
-             pSliceHead->iFirstMbInSlice);
-    return GENERATE_ERROR_NO (ERR_LEVEL_SLICE_HEADER, ERR_INFO_INVALID_SLICE_TYPE);
-  }
-  if (uiSliceType > 4)
-    uiSliceType -= 5;
-
-  if (B_SLICE == uiSliceType) {
-    WelsLog (pLogCtx, WELS_LOG_WARNING, "ParseSliceHeaderSyntaxs(): B slice not supported.");
-    return GENERATE_ERROR_NO (ERR_LEVEL_SLICE_HEADER, ERR_INFO_UNSUPPORTED_BIPRED);
-  }
-  if ((NAL_UNIT_CODED_SLICE_IDR == eNalType) && (I_SLICE != uiSliceType)) {
-    WelsLog (pLogCtx, WELS_LOG_WARNING, "Invalid slice type(%d) in IDR picture. ", uiSliceType);
-    return GENERATE_ERROR_NO (ERR_LEVEL_SLICE_HEADER, ERR_INFO_INVALID_SLICE_TYPE);
-  }
-
-  if (kbExtensionFlag) {
-    if (uiSliceType > 2) {
-      WelsLog (pLogCtx, WELS_LOG_WARNING, "Invalid slice type(%d).", uiSliceType);
-      return GENERATE_ERROR_NO (ERR_LEVEL_SLICE_HEADER, ERR_INFO_INVALID_SLICE_TYPE);
-    }
-  }
-
-  pSliceHead->eSliceType = static_cast <EWelsSliceType> (uiSliceType);
-
-  WELS_READ_VERIFY (BsGetUe (pBs, &uiCode)); //pic_parameter_set_id
-  WELS_CHECK_SE_UPPER_ERROR (uiCode, (MAX_PPS_COUNT - 1), "iPpsId out of range",
-                             GENERATE_ERROR_NO (ERR_LEVEL_SLICE_HEADER,
-                                 ERR_INFO_PPS_ID_OVERFLOW));
-  iPpsId = uiCode;
-
-  //add check PPS available here
-  if (pCtx->bPpsAvailFlags[iPpsId] == false) {
-    pCtx->sDecoderStatistics.iPpsReportErrorNum++;
-    if (pCtx->iPPSLastInvalidId != iPpsId) {
-      WelsLog (pLogCtx, WELS_LOG_ERROR, "PPS id (%d) is invalid, previous id (%d) error ignored (%d)!", iPpsId,
-               pCtx->iPPSLastInvalidId, pCtx->iPPSInvalidNum);
-      pCtx->iPPSLastInvalidId = iPpsId;
-      pCtx->iPPSInvalidNum = 0;
-    } else {
-      pCtx->iPPSInvalidNum++;
-    }
-    pCtx->iErrorCode |= dsNoParamSets;
-    return GENERATE_ERROR_NO (ERR_LEVEL_SLICE_HEADER, ERR_INFO_INVALID_PPS_ID);
-  }
-  pCtx->iPPSLastInvalidId = -1;
-
-  pPps    = &pCtx->sPpsBuffer[iPpsId];
-
-  if (pPps->uiNumSliceGroups == 0) {
-    WelsLog (pLogCtx, WELS_LOG_WARNING, "Invalid PPS referenced");
-    pCtx->iErrorCode |= dsNoParamSets;
-    return GENERATE_ERROR_NO (ERR_LEVEL_SLICE_HEADER, ERR_INFO_NO_PARAM_SETS);
-  }
-
-  if (kbExtensionFlag) {
-    pSubsetSps      = &pCtx->sSubsetSpsBuffer[pPps->iSpsId];
-    pSps            = &pSubsetSps->sSps;
-    if (pCtx->bSubspsAvailFlags[pPps->iSpsId] == false) {
-      pCtx->sDecoderStatistics.iSubSpsReportErrorNum++;
-      if (pCtx->iSubSPSLastInvalidId != pPps->iSpsId) {
-        WelsLog (pLogCtx, WELS_LOG_ERROR, "Sub SPS id (%d) is invalid, previous id (%d) error ignored (%d)!", pPps->iSpsId,
-                 pCtx->iSubSPSLastInvalidId, pCtx->iSubSPSInvalidNum);
-        pCtx->iSubSPSLastInvalidId = pPps->iSpsId;
-        pCtx->iSubSPSInvalidNum = 0;
-      } else {
-        pCtx->iSubSPSInvalidNum++;
-      }
-      pCtx->iErrorCode |= dsNoParamSets;
-      return GENERATE_ERROR_NO (ERR_LEVEL_SLICE_HEADER, ERR_INFO_INVALID_SPS_ID);
-    }
-    pCtx->iSubSPSLastInvalidId = -1;
-  } else {
-    if (pCtx->bSpsAvailFlags[pPps->iSpsId] == false) {
-      pCtx->sDecoderStatistics.iSpsReportErrorNum++;
-      if (pCtx->iSPSLastInvalidId != pPps->iSpsId) {
-        WelsLog (pLogCtx, WELS_LOG_ERROR, "SPS id (%d) is invalid, previous id (%d) error ignored (%d)!", pPps->iSpsId,
-                 pCtx->iSPSLastInvalidId, pCtx->iSPSInvalidNum);
-        pCtx->iSPSLastInvalidId = pPps->iSpsId;
-        pCtx->iSPSInvalidNum = 0;
-      } else {
-        pCtx->iSPSInvalidNum++;
-      }
-      pCtx->iErrorCode |= dsNoParamSets;
-      return GENERATE_ERROR_NO (ERR_LEVEL_SLICE_HEADER, ERR_INFO_INVALID_SPS_ID);
-    }
-    pCtx->iSPSLastInvalidId = -1;
-    pSps = &pCtx->sSpsBuffer[pPps->iSpsId];
-  }
-  pSliceHead->iPpsId = iPpsId;
-  pSliceHead->iSpsId = pPps->iSpsId;
-  pSliceHead->pPps   = pPps;
-  pSliceHead->pSps   = pSps;
-
-  pSliceHeadExt->pSubsetSps = pSubsetSps;
-
-  if (pSps->iNumRefFrames == 0) {
-    if ((uiSliceType != I_SLICE) && (uiSliceType != SI_SLICE)) {
-      WelsLog (pLogCtx, WELS_LOG_WARNING, "slice_type (%d) not supported for num_ref_frames = 0.", uiSliceType);
-      return GENERATE_ERROR_NO (ERR_LEVEL_SLICE_HEADER, ERR_INFO_INVALID_SLICE_TYPE);
-    }
-  }
-
-  bIdrFlag = (!kbExtensionFlag && eNalType == NAL_UNIT_CODED_SLICE_IDR) || (kbExtensionFlag && pNalHeaderExt->bIdrFlag);
-  pSliceHead->bIdrFlag = bIdrFlag;
-
-  if (pSps->uiLog2MaxFrameNum == 0) {
-    WelsLog (pLogCtx, WELS_LOG_WARNING, "non existing SPS referenced");
-    return GENERATE_ERROR_NO (ERR_LEVEL_SLICE_HEADER, ERR_INFO_NO_PARAM_SETS);
-  }
-  // check first_mb_in_slice
-  WELS_CHECK_SE_UPPER_ERROR ((uint32_t) (pSliceHead->iFirstMbInSlice), (pSps->uiTotalMbCount - 1), "first_mb_in_slice",
-                             GENERATE_ERROR_NO (ERR_LEVEL_SLICE_HEADER, ERR_INFO_INVALID_FIRST_MB_IN_SLICE));
-  WELS_READ_VERIFY (BsGetBits (pBs, pSps->uiLog2MaxFrameNum, &uiCode)); //frame_num
-  pSliceHead->iFrameNum = uiCode;
-
-  pSliceHead->bFieldPicFlag    = false;
-  pSliceHead->bBottomFiledFlag = false;
-  if (!pSps->bFrameMbsOnlyFlag) {
-    WelsLog (pLogCtx, WELS_LOG_WARNING, "ParseSliceHeaderSyntaxs(): frame_mbs_only_flag = %d not supported. ",
-             pSps->bFrameMbsOnlyFlag);
-    return GENERATE_ERROR_NO (ERR_LEVEL_SLICE_HEADER, ERR_INFO_UNSUPPORTED_MBAFF);
-  }
-  pSliceHead->iMbWidth  = pSps->iMbWidth;
-  pSliceHead->iMbHeight = pSps->iMbHeight / (1 + pSliceHead->bFieldPicFlag);
-
-  if (bIdrFlag) {
-    if (pSliceHead->iFrameNum != 0) {
-      WelsLog (pLogCtx, WELS_LOG_WARNING,
-               "ParseSliceHeaderSyntaxs(), invaild frame number: %d due to IDR frame introduced!",
-               pSliceHead->iFrameNum);
-      return GENERATE_ERROR_NO (ERR_LEVEL_SLICE_HEADER, ERR_INFO_INVALID_FRAME_NUM);
-    }
-    WELS_READ_VERIFY (BsGetUe (pBs, &uiCode)); //idr_pic_id
-    // standard 7.4.3 idr_pic_id should be in range 0 to 65535, inclusive.
-    WELS_CHECK_SE_UPPER_ERROR (uiCode, SLICE_HEADER_IDR_PIC_ID_MAX, "idr_pic_id", GENERATE_ERROR_NO (ERR_LEVEL_SLICE_HEADER,
-                               ERR_INFO_INVALID_IDR_PIC_ID));
-    pSliceHead->uiIdrPicId = uiCode; /* uiIdrPicId */
-#ifdef LONG_TERM_REF
-    pCtx->uiCurIdrPicId = pSliceHead->uiIdrPicId;
-#endif
-  }
-
-  pSliceHead->iDeltaPicOrderCntBottom = 0;
-  pSliceHead->iDeltaPicOrderCnt[0] =
-    pSliceHead->iDeltaPicOrderCnt[1] = 0;
-  if (pSps->uiPocType == 0) {
-    WELS_READ_VERIFY (BsGetBits (pBs, pSps->iLog2MaxPocLsb, &uiCode)); //pic_order_cnt_lsb
-    pSliceHead->iPicOrderCntLsb = uiCode;
-    if (pPps->bPicOrderPresentFlag && !pSliceHead->bFieldPicFlag) {
-      WELS_READ_VERIFY (BsGetSe (pBs, &iCode)); //delta_pic_order_cnt_bottom
-      pSliceHead->iDeltaPicOrderCntBottom = iCode;
-    }
-  } else if (pSps->uiPocType == 1 && !pSps->bDeltaPicOrderAlwaysZeroFlag) {
-    WELS_READ_VERIFY (BsGetSe (pBs, &iCode)); //delta_pic_order_cnt[ 0 ]
-    pSliceHead->iDeltaPicOrderCnt[0] = iCode;
-    if (pPps->bPicOrderPresentFlag && !pSliceHead->bFieldPicFlag) {
-      WELS_READ_VERIFY (BsGetSe (pBs, &iCode)); //delta_pic_order_cnt[ 1 ]
-      pSliceHead->iDeltaPicOrderCnt[1] = iCode;
-    }
-  }
-
-  pSliceHead->iRedundantPicCnt = 0;
-  if (pPps->bRedundantPicCntPresentFlag) {
-    WELS_READ_VERIFY (BsGetUe (pBs, &uiCode)); //redundant_pic_cnt
-    // standard section 7.4.3, redundant_pic_cnt should be in range 0 to 127, inclusive.
-    WELS_CHECK_SE_UPPER_ERROR (uiCode, SLICE_HEADER_REDUNDANT_PIC_CNT_MAX, "redundant_pic_cnt",
-                               GENERATE_ERROR_NO (ERR_LEVEL_SLICE_HEADER, ERR_INFO_INVALID_REDUNDANT_PIC_CNT));
-    pSliceHead->iRedundantPicCnt = uiCode;
-    if (pSliceHead->iRedundantPicCnt > 0) {
-      WelsLog (pLogCtx, WELS_LOG_WARNING, "Redundant picture not supported!");
-      return GENERATE_ERROR_NO (ERR_LEVEL_SLICE_HEADER, ERR_INFO_INVALID_REDUNDANT_PIC_CNT);
-    }
-  }
-
-  //set defaults, might be overriden a few line later
-  pSliceHead->uiRefCount[0] = pPps->uiNumRefIdxL0Active;
-  pSliceHead->uiRefCount[1] = pPps->uiNumRefIdxL1Active;
-
-  bool bReadNumRefFlag = (P_SLICE == uiSliceType);
-  if (kbExtensionFlag) {
-    bReadNumRefFlag &= (BASE_QUALITY_ID == pNalHeaderExt->uiQualityId);
-  }
-  if (bReadNumRefFlag) {
-    WELS_READ_VERIFY (BsGetOneBit (pBs, &uiCode)); //num_ref_idx_active_override_flag
-    pSliceHead->bNumRefIdxActiveOverrideFlag = !!uiCode;
-    if (pSliceHead->bNumRefIdxActiveOverrideFlag) {
-      WELS_READ_VERIFY (BsGetUe (pBs, &uiCode)); //num_ref_idx_l0_active_minus1
-      WELS_CHECK_SE_UPPER_ERROR (uiCode, MAX_NUM_REF_IDX_L0_ACTIVE_MINUS1, "num_ref_idx_l0_active_minus1",
-                                 GENERATE_ERROR_NO (ERR_LEVEL_SLICE_HEADER, ERR_INFO_INVALID_NUM_REF_IDX_L0_ACTIVE_MINUS1));
-      pSliceHead->uiRefCount[0] = 1 + uiCode;
-    }
-  }
-
-  if (pSliceHead->uiRefCount[0] > MAX_REF_PIC_COUNT || pSliceHead->uiRefCount[1] > MAX_REF_PIC_COUNT) {
-    WelsLog (pLogCtx, WELS_LOG_WARNING, "reference overflow");
-    return GENERATE_ERROR_NO (ERR_LEVEL_SLICE_HEADER, ERR_INFO_REF_COUNT_OVERFLOW);
-  }
-
-  if (BASE_QUALITY_ID == uiQualityId) {
-    iRet = ParseRefPicListReordering (pBs, pSliceHead);
-    if (iRet != ERR_NONE) {
-      WelsLog (pLogCtx, WELS_LOG_WARNING, "invalid ref pPic list reordering syntaxs!");
-      return iRet;
-    }
-
-    if (pPps->bWeightedPredFlag && (uiSliceType == P_SLICE)) {
-      iRet = ParsePredWeightedTable (pBs, pSliceHead);
-      if (iRet != ERR_NONE) {
-        WelsLog (pLogCtx, WELS_LOG_WARNING, "invalid weighted prediction syntaxs!");
-        return iRet;
-      }
-    }
-
-    if (kbExtensionFlag) {
-      if (pNalHeaderExt->iNoInterLayerPredFlag || pNalHeaderExt->uiQualityId > 0)
-        pSliceHeadExt->bBasePredWeightTableFlag = false;
-      else
-        pSliceHeadExt->bBasePredWeightTableFlag = true;
-    }
-
-    if (kpCurNal->sNalHeaderExt.sNalUnitHeader.uiNalRefIdc != 0) {
-      iRet = ParseDecRefPicMarking (pCtx, pBs, pSliceHead, pSps, bIdrFlag);
-      if (iRet != ERR_NONE) {
-        return iRet;
-      }
-
-      if (kbExtensionFlag && !pSubsetSps->sSpsSvcExt.bSliceHeaderRestrictionFlag) {
-        WELS_READ_VERIFY (BsGetOneBit (pBs, &uiCode)); //store_ref_base_pic_flag
-        pSliceHeadExt->bStoreRefBasePicFlag = !!uiCode;
-        if ((pNalHeaderExt->bUseRefBasePicFlag || pSliceHeadExt->bStoreRefBasePicFlag) && !bIdrFlag) {
-          WelsLog (pLogCtx, WELS_LOG_WARNING,
-                   "ParseSliceHeaderSyntaxs(): bUseRefBasePicFlag or bStoreRefBasePicFlag = 1 not supported.");
-          return GENERATE_ERROR_NO (ERR_LEVEL_SLICE_HEADER, ERR_INFO_UNSUPPORTED_ILP);
-        }
-      }
-    }
-  }
-
-  if (pPps->bEntropyCodingModeFlag) {
-    if (pSliceHead->eSliceType != I_SLICE && pSliceHead->eSliceType != SI_SLICE) {
-      WELS_READ_VERIFY (BsGetUe (pBs, &uiCode));
-      WELS_CHECK_SE_UPPER_ERROR (uiCode, SLICE_HEADER_CABAC_INIT_IDC_MAX, "cabac_init_idc", ERR_INFO_INVALID_CABAC_INIT_IDC);
-      pSliceHead->iCabacInitIdc = uiCode;
-    } else
-      pSliceHead->iCabacInitIdc = 0;
-  }
-
-  WELS_READ_VERIFY (BsGetSe (pBs, &iCode)); //slice_qp_delta
-  pSliceHead->iSliceQpDelta     = iCode;
-  pSliceHead->iSliceQp          = pPps->iPicInitQp + pSliceHead->iSliceQpDelta;
-  if (pSliceHead->iSliceQp < 0 || pSliceHead->iSliceQp > 51) {
-    WelsLog (pLogCtx, WELS_LOG_WARNING, "QP %d out of range", pSliceHead->iSliceQp);
-    return GENERATE_ERROR_NO (ERR_LEVEL_SLICE_HEADER, ERR_INFO_INVALID_QP);
-  }
-
-  //FIXME qscale / qp ... stuff
-  if (!kbExtensionFlag) {
-    if (uiSliceType == SP_SLICE || uiSliceType == SI_SLICE) {
-      WelsLog (pLogCtx, WELS_LOG_WARNING, "SP/SI not supported");
-      return GENERATE_ERROR_NO (ERR_LEVEL_SLICE_HEADER, ERR_INFO_UNSUPPORTED_SPSI);
-    }
-  }
-
-  pSliceHead->uiDisableDeblockingFilterIdc = 0;
-  pSliceHead->iSliceAlphaC0Offset          = 0;
-  pSliceHead->iSliceBetaOffset             = 0;
-  if (pPps->bDeblockingFilterControlPresentFlag) {
-    WELS_READ_VERIFY (BsGetUe (pBs, &uiCode)); //disable_deblocking_filter_idc
-    pSliceHead->uiDisableDeblockingFilterIdc = uiCode;
-    //refer to JVT-X201wcm1.doc G.7.4.3.4--2010.4.20
-    if (pSliceHead->uiDisableDeblockingFilterIdc > 6) {
-      WelsLog (pLogCtx, WELS_LOG_WARNING, "disable_deblock_filter_idc (%d) out of range [0, 6]",
-               pSliceHead->uiDisableDeblockingFilterIdc);
-      return GENERATE_ERROR_NO (ERR_LEVEL_SLICE_HEADER, ERR_INFO_INVALID_DBLOCKING_IDC);
-    }
-    if (pSliceHead->uiDisableDeblockingFilterIdc != 1) {
-      WELS_READ_VERIFY (BsGetSe (pBs, &iCode)); //slice_alpha_c0_offset_div2
-      pSliceHead->iSliceAlphaC0Offset = iCode * 2;
-      WELS_CHECK_SE_BOTH_ERROR (pSliceHead->iSliceAlphaC0Offset, SLICE_HEADER_ALPHAC0_BETA_OFFSET_MIN,
-                                SLICE_HEADER_ALPHAC0_BETA_OFFSET_MAX, "slice_alpha_c0_offset_div2 * 2", GENERATE_ERROR_NO (ERR_LEVEL_SLICE_HEADER,
-                                    ERR_INFO_INVALID_SLICE_ALPHA_C0_OFFSET_DIV2));
-      WELS_READ_VERIFY (BsGetSe (pBs, &iCode)); //slice_beta_offset_div2
-      pSliceHead->iSliceBetaOffset = iCode * 2;
-      WELS_CHECK_SE_BOTH_ERROR (pSliceHead->iSliceBetaOffset, SLICE_HEADER_ALPHAC0_BETA_OFFSET_MIN,
-                                SLICE_HEADER_ALPHAC0_BETA_OFFSET_MAX, "slice_beta_offset_div2 * 2", GENERATE_ERROR_NO (ERR_LEVEL_SLICE_HEADER,
-                                    ERR_INFO_INVALID_SLICE_BETA_OFFSET_DIV2));
-    }
-  }
-
-  bSgChangeCycleInvolved = (pPps->uiNumSliceGroups > 1 && pPps->uiSliceGroupMapType >= 3
-                            && pPps->uiSliceGroupMapType <= 5);
-  if (kbExtensionFlag && bSgChangeCycleInvolved)
-    bSgChangeCycleInvolved = (bSgChangeCycleInvolved && (uiQualityId == BASE_QUALITY_ID));
-  if (bSgChangeCycleInvolved) {
-    if (pPps->uiSliceGroupChangeRate > 0) {
-      const int32_t kiNumBits = (int32_t)WELS_CEIL (log (static_cast<double> (1 + pPps->uiPicSizeInMapUnits /
-                                pPps->uiSliceGroupChangeRate)));
-      WELS_READ_VERIFY (BsGetBits (pBs, kiNumBits, &uiCode)); //lice_group_change_cycle
-      pSliceHead->iSliceGroupChangeCycle = uiCode;
-    } else
-      pSliceHead->iSliceGroupChangeCycle = 0;
-  }
-
-  if (!kbExtensionFlag) {
-    FillDefaultSliceHeaderExt (pSliceHeadExt, pNalHeaderExt);
-  } else {
-    /* Extra syntax elements newly introduced */
-    pSliceHeadExt->pSubsetSps = pSubsetSps;
-
-    if (!pNalHeaderExt->iNoInterLayerPredFlag && BASE_QUALITY_ID == uiQualityId) {
-      //the following should be deleted for CODE_CLEAN
-      WELS_READ_VERIFY (BsGetUe (pBs, &uiCode)); //ref_layer_dq_id
-      pSliceHeadExt->uiRefLayerDqId = uiCode;
-      if (pSubsetSps->sSpsSvcExt.bInterLayerDeblockingFilterCtrlPresentFlag) {
-        WELS_READ_VERIFY (BsGetUe (pBs, &uiCode)); //disable_inter_layer_deblocking_filter_idc
-        pSliceHeadExt->uiDisableInterLayerDeblockingFilterIdc = uiCode;
-        //refer to JVT-X201wcm1.doc G.7.4.3.4--2010.4.20
-        if (pSliceHeadExt->uiDisableInterLayerDeblockingFilterIdc > 6) {
-          WelsLog (& (pCtx->sLogCtx), WELS_LOG_WARNING, "disable_inter_layer_deblock_filter_idc (%d) out of range [0, 6]",
-                   pSliceHeadExt->uiDisableInterLayerDeblockingFilterIdc);
-          return GENERATE_ERROR_NO (ERR_LEVEL_SLICE_HEADER, ERR_INFO_INVALID_DBLOCKING_IDC);
-        }
-        if (pSliceHeadExt->uiDisableInterLayerDeblockingFilterIdc != 1) {
-          WELS_READ_VERIFY (BsGetSe (pBs, &iCode)); //inter_layer_slice_alpha_c0_offset_div2
-          pSliceHeadExt->iInterLayerSliceAlphaC0Offset = iCode * 2;
-          WELS_CHECK_SE_BOTH_ERROR (pSliceHeadExt->iInterLayerSliceAlphaC0Offset,
-                                    SLICE_HEADER_INTER_LAYER_ALPHAC0_BETA_OFFSET_MIN, SLICE_HEADER_INTER_LAYER_ALPHAC0_BETA_OFFSET_MAX,
-                                    "inter_layer_alpha_c0_offset_div2 * 2", GENERATE_ERROR_NO (ERR_LEVEL_SLICE_HEADER,
-                                        ERR_INFO_INVALID_SLICE_ALPHA_C0_OFFSET_DIV2));
-          WELS_READ_VERIFY (BsGetSe (pBs, &iCode)); //inter_layer_slice_beta_offset_div2
-          pSliceHeadExt->iInterLayerSliceBetaOffset = iCode * 2;
-          WELS_CHECK_SE_BOTH_ERROR (pSliceHeadExt->iInterLayerSliceBetaOffset, SLICE_HEADER_INTER_LAYER_ALPHAC0_BETA_OFFSET_MIN,
-                                    SLICE_HEADER_INTER_LAYER_ALPHAC0_BETA_OFFSET_MAX, "inter_layer_slice_beta_offset_div2 * 2",
-                                    GENERATE_ERROR_NO (ERR_LEVEL_SLICE_HEADER, ERR_INFO_INVALID_SLICE_BETA_OFFSET_DIV2));
-        }
-      }
-
-      pSliceHeadExt->uiRefLayerChromaPhaseXPlus1Flag = pSubsetSps->sSpsSvcExt.uiSeqRefLayerChromaPhaseXPlus1Flag;
-      pSliceHeadExt->uiRefLayerChromaPhaseYPlus1     = pSubsetSps->sSpsSvcExt.uiSeqRefLayerChromaPhaseYPlus1;
-
-      WELS_READ_VERIFY (BsGetOneBit (pBs, &uiCode)); //constrained_intra_resampling_flag
-      pSliceHeadExt->bConstrainedIntraResamplingFlag = !!uiCode;
-
-      {
-        SPosOffset pos;
-        pos.iLeftOffset   = pSubsetSps->sSpsSvcExt.sSeqScaledRefLayer.iLeftOffset;
-        pos.iTopOffset    = pSubsetSps->sSpsSvcExt.sSeqScaledRefLayer.iTopOffset * (2 - pSps->bFrameMbsOnlyFlag);
-        pos.iRightOffset  = pSubsetSps->sSpsSvcExt.sSeqScaledRefLayer.iRightOffset;
-        pos.iBottomOffset = pSubsetSps->sSpsSvcExt.sSeqScaledRefLayer.iBottomOffset * (2 - pSps->bFrameMbsOnlyFlag);
-        //memcpy(&pSliceHeadExt->sScaledRefLayer, &pos, sizeof(SPosOffset));//confirmed_safe_unsafe_usage
-        pSliceHeadExt->iScaledRefLayerPicWidthInSampleLuma  = (pSliceHead->iMbWidth << 4) -
-            (pos.iLeftOffset + pos.iRightOffset);
-        pSliceHeadExt->iScaledRefLayerPicHeightInSampleLuma = (pSliceHead->iMbHeight << 4) -
-            (pos.iTopOffset + pos.iBottomOffset) / (1 + pSliceHead->bFieldPicFlag);
-      }
-    } else if (uiQualityId > BASE_QUALITY_ID) {
-      WelsLog (pLogCtx, WELS_LOG_WARNING, "MGS not supported.");
-      return GENERATE_ERROR_NO (ERR_LEVEL_SLICE_HEADER, ERR_INFO_UNSUPPORTED_MGS);
-    } else {
-      pSliceHeadExt->uiRefLayerDqId = (uint8_t) - 1;
-    }
-
-    pSliceHeadExt->bSliceSkipFlag            = false;
-    pSliceHeadExt->bAdaptiveBaseModeFlag     = false;
-    pSliceHeadExt->bDefaultBaseModeFlag      = false;
-    pSliceHeadExt->bAdaptiveMotionPredFlag   = false;
-    pSliceHeadExt->bDefaultMotionPredFlag    = false;
-    pSliceHeadExt->bAdaptiveResidualPredFlag = false;
-    pSliceHeadExt->bDefaultResidualPredFlag  = false;
-    if (pNalHeaderExt->iNoInterLayerPredFlag)
-      pSliceHeadExt->bTCoeffLevelPredFlag    = false;
-    else
-      pSliceHeadExt->bTCoeffLevelPredFlag    = pSubsetSps->sSpsSvcExt.bSeqTCoeffLevelPredFlag;
-
-    if (!pNalHeaderExt->iNoInterLayerPredFlag) {
-      WELS_READ_VERIFY (BsGetOneBit (pBs, &uiCode)); //slice_skip_flag
-      pSliceHeadExt->bSliceSkipFlag = !!uiCode;
-      if (pSliceHeadExt->bSliceSkipFlag) {
-        WelsLog (pLogCtx, WELS_LOG_WARNING, "bSliceSkipFlag == 1 not supported.");
-        return GENERATE_ERROR_NO (ERR_LEVEL_SLICE_HEADER, ERR_INFO_UNSUPPORTED_SLICESKIP);
-      } else {
-        WELS_READ_VERIFY (BsGetOneBit (pBs, &uiCode)); //adaptive_base_mode_flag
-        pSliceHeadExt->bAdaptiveBaseModeFlag = !!uiCode;
-        if (!pSliceHeadExt->bAdaptiveBaseModeFlag) {
-          WELS_READ_VERIFY (BsGetOneBit (pBs, &uiCode)); //default_base_mode_flag
-          pSliceHeadExt->bDefaultBaseModeFlag = !!uiCode;
-        }
-        if (!pSliceHeadExt->bDefaultBaseModeFlag) {
-          WELS_READ_VERIFY (BsGetOneBit (pBs, &uiCode)); //adaptive_motion_prediction_flag
-          pSliceHeadExt->bAdaptiveMotionPredFlag = !!uiCode;
-          if (!pSliceHeadExt->bAdaptiveMotionPredFlag) {
-            WELS_READ_VERIFY (BsGetOneBit (pBs, &uiCode)); //default_motion_prediction_flag
-            pSliceHeadExt->bDefaultMotionPredFlag = !!uiCode;
-          }
-        }
-
-        WELS_READ_VERIFY (BsGetOneBit (pBs, &uiCode)); //adaptive_residual_prediction_flag
-        pSliceHeadExt->bAdaptiveResidualPredFlag = !!uiCode;
-        if (!pSliceHeadExt->bAdaptiveResidualPredFlag) {
-          WELS_READ_VERIFY (BsGetOneBit (pBs, &uiCode)); //default_residual_prediction_flag
-          pSliceHeadExt->bDefaultResidualPredFlag = !!uiCode;
-        }
-      }
-      if (pSubsetSps->sSpsSvcExt.bAdaptiveTCoeffLevelPredFlag) {
-        WELS_READ_VERIFY (BsGetOneBit (pBs, &uiCode)); //tcoeff_level_prediction_flag
-        pSliceHeadExt->bTCoeffLevelPredFlag = !!uiCode;
-      }
-    }
-
-    if (!pSubsetSps->sSpsSvcExt.bSliceHeaderRestrictionFlag) {
-      WELS_READ_VERIFY (BsGetBits (pBs, 4, &uiCode)); //scan_idx_start
-      pSliceHeadExt->uiScanIdxStart = uiCode;
-      WELS_READ_VERIFY (BsGetBits (pBs, 4, &uiCode)); //scan_idx_end
-      pSliceHeadExt->uiScanIdxEnd = uiCode;
-      if (pSliceHeadExt->uiScanIdxStart != 0 || pSliceHeadExt->uiScanIdxEnd != 15) {
-        WelsLog (pLogCtx, WELS_LOG_WARNING, "uiScanIdxStart (%d) != 0 and uiScanIdxEnd (%d) !=15 not supported here",
-                 pSliceHeadExt->uiScanIdxStart, pSliceHeadExt->uiScanIdxEnd);
-        return GENERATE_ERROR_NO (ERR_LEVEL_SLICE_HEADER, ERR_INFO_UNSUPPORTED_MGS);
-      }
-    } else {
-      pSliceHeadExt->uiScanIdxStart = 0;
-      pSliceHeadExt->uiScanIdxEnd   = 15;
-    }
-  }
-
-  return ERR_NONE;
-}
-
-/*
- *  Copy relative syntax elements of NALUnitHeaderExt, sRefPicBaseMarking and bStoreRefBasePicFlag in prefix nal unit.
- *  pSrc:   mark as decoded prefix NAL
- *  ppDst:  succeeded VCL NAL based AVC (I/P Slice)
- */
-bool PrefetchNalHeaderExtSyntax (PWelsDecoderContext pCtx, PNalUnit const kppDst, PNalUnit const kpSrc) {
-  PNalUnitHeaderExt pNalHdrExtD = NULL, pNalHdrExtS = NULL;
-  PSliceHeaderExt pShExtD = NULL;
-  PPrefixNalUnit pPrefixS = NULL;
-  PSps pSps = NULL;
-  int32_t iIdx = 0;
-
-  if (kppDst == NULL || kpSrc == NULL)
-    return false;
-
-  pNalHdrExtD   = &kppDst->sNalHeaderExt;
-  pNalHdrExtS   = &kpSrc->sNalHeaderExt;
-  pShExtD       = &kppDst->sNalData.sVclNal.sSliceHeaderExt;
-  pPrefixS      = &kpSrc->sNalData.sPrefixNal;
-  pSps          = &pCtx->sSpsBuffer[pCtx->sPpsBuffer[pShExtD->sSliceHeader.iPpsId].iSpsId];
-
-  pNalHdrExtD->uiDependencyId           = pNalHdrExtS->uiDependencyId;
-  pNalHdrExtD->uiQualityId              = pNalHdrExtS->uiQualityId;
-  pNalHdrExtD->uiTemporalId             = pNalHdrExtS->uiTemporalId;
-  pNalHdrExtD->uiPriorityId             = pNalHdrExtS->uiPriorityId;
-  pNalHdrExtD->bIdrFlag                 = pNalHdrExtS->bIdrFlag;
-  pNalHdrExtD->iNoInterLayerPredFlag    = pNalHdrExtS->iNoInterLayerPredFlag;
-  pNalHdrExtD->bDiscardableFlag         = pNalHdrExtS->bDiscardableFlag;
-  pNalHdrExtD->bOutputFlag              = pNalHdrExtS->bOutputFlag;
-  pNalHdrExtD->bUseRefBasePicFlag       = pNalHdrExtS->bUseRefBasePicFlag;
-  pNalHdrExtD->uiLayerDqId              = pNalHdrExtS->uiLayerDqId;
-
-  pShExtD->bStoreRefBasePicFlag         = pPrefixS->bStoreRefBasePicFlag;
-  memcpy (&pShExtD->sRefBasePicMarking, &pPrefixS->sRefPicBaseMarking,
-          sizeof (SRefBasePicMarking)); //confirmed_safe_unsafe_usage
-  if (pShExtD->sRefBasePicMarking.bAdaptiveRefBasePicMarkingModeFlag) {
-    PRefBasePicMarking pRefBasePicMarking = &pShExtD->sRefBasePicMarking;
-    iIdx = 0;
-    do {
-      if (pRefBasePicMarking->mmco_base[iIdx].uiMmcoType == MMCO_END)
-        break;
-      if (pRefBasePicMarking->mmco_base[iIdx].uiMmcoType == MMCO_SHORT2UNUSED)
-        pRefBasePicMarking->mmco_base[iIdx].iShortFrameNum = (pShExtD->sSliceHeader.iFrameNum -
-            pRefBasePicMarking->mmco_base[iIdx].uiDiffOfPicNums) & ((1 << pSps->uiLog2MaxFrameNum) - 1);
-      ++ iIdx;
-    } while (iIdx < MAX_MMCO_COUNT);
-  }
-
-  return true;
-}
-
-
-
-int32_t UpdateAccessUnit (PWelsDecoderContext pCtx) {
-  PAccessUnit pCurAu   = pCtx->pAccessUnitList;
-  int32_t iIdx         = pCurAu->uiEndPos;
-
-  // Conversed iterator
-  pCtx->uiTargetDqId = pCurAu->pNalUnitsList[iIdx]->sNalHeaderExt.uiLayerDqId;
-  pCurAu->uiActualUnitsNum  = iIdx + 1;
-  pCurAu->bCompletedAuFlag = true;
-
-  // Added for mosaic avoidance, 11/19/2009
-#ifdef LONG_TERM_REF
-  if (pCtx->bParamSetsLostFlag || pCtx->bNewSeqBegin)
-#else
-  if (pCtx->bReferenceLostAtT0Flag || pCtx->bNewSeqBegin)
-#endif
-  {
-    uint32_t uiActualIdx = 0;
-    while (uiActualIdx < pCurAu->uiActualUnitsNum) {
-      PNalUnit nal = pCurAu->pNalUnitsList[uiActualIdx];
-
-      if (nal->sNalHeaderExt.sNalUnitHeader.eNalUnitType == NAL_UNIT_CODED_SLICE_IDR || nal->sNalHeaderExt.bIdrFlag) {
-        break;
-      }
-      ++ uiActualIdx;
-    }
-    if (uiActualIdx ==
-        pCurAu->uiActualUnitsNum) { // no found IDR nal within incoming AU, need exit to avoid mosaic issue, 11/19/2009
-
-      pCtx->sDecoderStatistics.uiIDRLostNum++;
-      if (!pCtx->bParamSetsLostFlag)
-        WelsLog (& (pCtx->sLogCtx), WELS_LOG_WARNING,
-                 "UpdateAccessUnit():::::Key frame lost.....CAN NOT find IDR from current AU.");
-      pCtx->iErrorCode |= dsRefLost;
-      if (pCtx->pParam->eEcActiveIdc == ERROR_CON_DISABLE) {
-#ifdef LONG_TERM_REF
-        pCtx->iErrorCode |= dsNoParamSets;
-        return dsNoParamSets;
-#else
-        pCtx->iErrorCode |= dsRefLost;
-        return ERR_INFO_REFERENCE_PIC_LOST;
-#endif
-      }
-    }
-  }
-
-  return ERR_NONE;
-}
-
-int32_t InitialDqLayersContext (PWelsDecoderContext pCtx, const int32_t kiMaxWidth, const int32_t kiMaxHeight) {
-  int32_t i = 0;
-
-  WELS_VERIFY_RETURN_IF (ERR_INFO_INVALID_PARAM, (NULL == pCtx || kiMaxWidth <= 0 || kiMaxHeight <= 0))
-  pCtx->sMb.iMbWidth  = (kiMaxWidth + 15) >> 4;
-  pCtx->sMb.iMbHeight = (kiMaxHeight + 15) >> 4;
-
-  if (pCtx->bInitialDqLayersMem && kiMaxWidth <= pCtx->iPicWidthReq
-      && kiMaxHeight <= pCtx->iPicHeightReq) // have same dimension memory, skipped
-    return ERR_NONE;
-
-  CMemoryAlign* pMa = pCtx->pMemAlign;
-
-  UninitialDqLayersContext (pCtx);
-
-  do {
-    PDqLayer pDq = (PDqLayer)pMa->WelsMallocz (sizeof (SDqLayer), "PDqLayer");
-
-    if (pDq == NULL)
-      return ERR_INFO_OUT_OF_MEMORY;
-
-    pCtx->pDqLayersList[i] = pDq; //to keep consistence with in UninitialDqLayersContext()
-    memset (pDq, 0, sizeof (SDqLayer));
-
-    pCtx->sMb.pMbType[i] = (int16_t*)pMa->WelsMallocz (pCtx->sMb.iMbWidth * pCtx->sMb.iMbHeight * sizeof (int16_t),
-                           "pCtx->sMb.pMbType[]");
-    pCtx->sMb.pMv[i][0] = (int16_t (*)[16][2])pMa->WelsMallocz (pCtx->sMb.iMbWidth * pCtx->sMb.iMbHeight * sizeof (
-                            int16_t) * MV_A * MB_BLOCK4x4_NUM, "pCtx->sMb.pMv[][]");
-    pCtx->sMb.pRefIndex[i][0] = (int8_t (*)[MB_BLOCK4x4_NUM])pMa->WelsMallocz (pCtx->sMb.iMbWidth * pCtx->sMb.iMbHeight *
-                                sizeof (
-                                  int8_t) * MB_BLOCK4x4_NUM, "pCtx->sMb.pRefIndex[][]");
-    pCtx->sMb.pLumaQp[i] = (int8_t*)pMa->WelsMallocz (pCtx->sMb.iMbWidth * pCtx->sMb.iMbHeight * sizeof (int8_t),
-                           "pCtx->sMb.pLumaQp[]");
-    pCtx->sMb.pNoSubMbPartSizeLessThan8x8Flag[i] = (bool*)pMa->WelsMallocz (pCtx->sMb.iMbWidth * pCtx->sMb.iMbHeight *
-        sizeof (
-          bool),
-        "pCtx->sMb.pNoSubMbPartSizeLessThan8x8Flag[]");
-    pCtx->sMb.pTransformSize8x8Flag[i] = (bool*)pMa->WelsMallocz (pCtx->sMb.iMbWidth * pCtx->sMb.iMbHeight * sizeof (bool),
-                                         "pCtx->sMb.pTransformSize8x8Flag[]");
-    pCtx->sMb.pChromaQp[i] = (int8_t (*)[2])pMa->WelsMallocz (pCtx->sMb.iMbWidth * pCtx->sMb.iMbHeight * sizeof (
-                               int8_t) * 2,
-                             "pCtx->sMb.pChromaQp[]");
-    pCtx->sMb.pMvd[i][0] = (int16_t (*)[16][2])pMa->WelsMallocz (pCtx->sMb.iMbWidth * pCtx->sMb.iMbHeight * sizeof (
-                             int16_t) * MV_A * MB_BLOCK4x4_NUM, "pCtx->sMb.pMvd[][]");
-    pCtx->sMb.pCbfDc[i] = (uint16_t*)pMa->WelsMallocz (pCtx->sMb.iMbWidth * pCtx->sMb.iMbHeight * sizeof (uint16_t),
-                          "pCtx->sMb.pCbfDc[]");
-    pCtx->sMb.pNzc[i] = (int8_t (*)[24])pMa->WelsMallocz (pCtx->sMb.iMbWidth * pCtx->sMb.iMbHeight * sizeof (int8_t) * 24,
-                        "pCtx->sMb.pNzc[]");
-    pCtx->sMb.pNzcRs[i] = (int8_t (*)[24])pMa->WelsMallocz (pCtx->sMb.iMbWidth * pCtx->sMb.iMbHeight * sizeof (int8_t) * 24,
-                          "pCtx->sMb.pNzcRs[]");
-    pCtx->sMb.pScaledTCoeff[i] = (int16_t (*)[MB_COEFF_LIST_SIZE])pMa->WelsMallocz (pCtx->sMb.iMbWidth *
-                                 pCtx->sMb.iMbHeight *
-                                 sizeof (int16_t) * MB_COEFF_LIST_SIZE, "pCtx->sMb.pScaledTCoeff[]");
-    pCtx->sMb.pIntraPredMode[i] = (int8_t (*)[8])pMa->WelsMallocz (pCtx->sMb.iMbWidth * pCtx->sMb.iMbHeight * sizeof (
-                                    int8_t) * 8,
-                                  "pCtx->sMb.pIntraPredMode[]");
-    pCtx->sMb.pIntra4x4FinalMode[i] = (int8_t (*)[MB_BLOCK4x4_NUM])pMa->WelsMallocz (pCtx->sMb.iMbWidth *
-                                      pCtx->sMb.iMbHeight *
-                                      sizeof (int8_t) * MB_BLOCK4x4_NUM, "pCtx->sMb.pIntra4x4FinalMode[]");
-    pCtx->sMb.pIntraNxNAvailFlag[i] = (uint8_t (*))pMa->WelsMallocz (pCtx->sMb.iMbWidth * pCtx->sMb.iMbHeight * sizeof (
-                                        int8_t),
-                                      "pCtx->sMb.pIntraNxNAvailFlag");
-    pCtx->sMb.pChromaPredMode[i] = (int8_t*)pMa->WelsMallocz (pCtx->sMb.iMbWidth * pCtx->sMb.iMbHeight * sizeof (int8_t),
-                                   "pCtx->sMb.pChromaPredMode[]");
-    pCtx->sMb.pCbp[i] = (int8_t*)pMa->WelsMallocz (pCtx->sMb.iMbWidth * pCtx->sMb.iMbHeight * sizeof (int8_t),
-                        "pCtx->sMb.pCbp[]");
-    pCtx->sMb.pSubMbType[i] = (int8_t (*)[MB_PARTITION_SIZE])pMa->WelsMallocz (pCtx->sMb.iMbWidth * pCtx->sMb.iMbHeight *
-                              sizeof (
-                                int8_t) * MB_PARTITION_SIZE, "pCtx->sMb.pSubMbType[]");
-    pCtx->sMb.pSliceIdc[i] = (int32_t*) pMa->WelsMallocz (pCtx->sMb.iMbWidth * pCtx->sMb.iMbHeight * sizeof (int32_t),
-                             "pCtx->sMb.pSliceIdc[]"); // using int32_t for slice_idc, 4/21/2010
-    pCtx->sMb.pResidualPredFlag[i] = (int8_t*) pMa->WelsMallocz (pCtx->sMb.iMbWidth * pCtx->sMb.iMbHeight * sizeof (int8_t),
-                                     "pCtx->sMb.pResidualPredFlag[]");
-    pCtx->sMb.pInterPredictionDoneFlag[i] = (int8_t*) pMa->WelsMallocz (pCtx->sMb.iMbWidth * pCtx->sMb.iMbHeight * sizeof (
-        int8_t), "pCtx->sMb.pInterPredictionDoneFlag[]");
-
-    pCtx->sMb.pMbCorrectlyDecodedFlag[i] = (bool*) pMa->WelsMallocz (pCtx->sMb.iMbWidth * pCtx->sMb.iMbHeight * sizeof (
-        bool),
-                                           "pCtx->sMb.pMbCorrectlyDecodedFlag[]");
-    pCtx->sMb.pMbRefConcealedFlag[i] = (bool*) pMa->WelsMallocz (pCtx->sMb.iMbWidth * pCtx->sMb.iMbHeight * sizeof (bool),
-                                       "pCtx->pMbRefConcealedFlag[]");
-
-    // check memory block valid due above allocated..
-    WELS_VERIFY_RETURN_IF (ERR_INFO_OUT_OF_MEMORY,
-                           ((NULL == pCtx->sMb.pMbType[i]) ||
-                            (NULL == pCtx->sMb.pMv[i][0]) ||
-                            (NULL == pCtx->sMb.pRefIndex[i][0]) ||
-                            (NULL == pCtx->sMb.pLumaQp[i]) ||
-                            (NULL == pCtx->sMb.pNoSubMbPartSizeLessThan8x8Flag[i]) ||
-                            (NULL == pCtx->sMb.pTransformSize8x8Flag[i]) ||
-                            (NULL == pCtx->sMb.pChromaQp[i]) ||
-                            (NULL == pCtx->sMb.pMvd[i][0]) ||
-                            (NULL == pCtx->sMb.pCbfDc[i]) ||
-                            (NULL == pCtx->sMb.pNzc[i]) ||
-                            (NULL == pCtx->sMb.pNzcRs[i]) ||
-                            (NULL == pCtx->sMb.pScaledTCoeff[i]) ||
-                            (NULL == pCtx->sMb.pIntraPredMode[i]) ||
-                            (NULL == pCtx->sMb.pIntra4x4FinalMode[i]) ||
-                            (NULL == pCtx->sMb.pIntraNxNAvailFlag[i]) ||
-                            (NULL == pCtx->sMb.pChromaPredMode[i]) ||
-                            (NULL == pCtx->sMb.pCbp[i]) ||
-                            (NULL == pCtx->sMb.pSubMbType[i]) ||
-                            (NULL == pCtx->sMb.pSliceIdc[i]) ||
-                            (NULL == pCtx->sMb.pResidualPredFlag[i]) ||
-                            (NULL == pCtx->sMb.pInterPredictionDoneFlag[i]) ||
-                            (NULL == pCtx->sMb.pMbRefConcealedFlag[i]) ||
-                            (NULL == pCtx->sMb.pMbCorrectlyDecodedFlag[i])
-                           )
-                          )
-
-    memset (pCtx->sMb.pSliceIdc[i], 0xff, (pCtx->sMb.iMbWidth * pCtx->sMb.iMbHeight * sizeof (int32_t)));
-
-    ++ i;
-  } while (i < LAYER_NUM_EXCHANGEABLE);
-
-  pCtx->bInitialDqLayersMem     = true;
-  pCtx->iPicWidthReq            = kiMaxWidth;
-  pCtx->iPicHeightReq           = kiMaxHeight;
-
-  return ERR_NONE;
-}
-
-void UninitialDqLayersContext (PWelsDecoderContext pCtx) {
-  int32_t i = 0;
-  CMemoryAlign* pMa = pCtx->pMemAlign;
-
-  do {
-    PDqLayer pDq = pCtx->pDqLayersList[i];
-    if (pDq == NULL) {
-      ++ i;
-      continue;
-    }
-
-    if (pCtx->sMb.pMbType[i]) {
-      pMa->WelsFree (pCtx->sMb.pMbType[i], "pCtx->sMb.pMbType[]");
-
-      pCtx->sMb.pMbType[i] = NULL;
-    }
-
-    if (pCtx->sMb.pMv[i][0]) {
-      pMa->WelsFree (pCtx->sMb.pMv[i][0], "pCtx->sMb.pMv[][]");
-
-      pCtx->sMb.pMv[i][0] = NULL;
-    }
-
-    if (pCtx->sMb.pRefIndex[i][0]) {
-      pMa->WelsFree (pCtx->sMb.pRefIndex[i][0], "pCtx->sMb.pRefIndex[][]");
-
-      pCtx->sMb.pRefIndex[i][0] = NULL;
-    }
-
-    if (pCtx->sMb.pNoSubMbPartSizeLessThan8x8Flag[i]) {
-      pMa->WelsFree (pCtx->sMb.pNoSubMbPartSizeLessThan8x8Flag[i], "pCtx->sMb.pNoSubMbPartSizeLessThan8x8Flag[]");
-
-      pCtx->sMb.pNoSubMbPartSizeLessThan8x8Flag[i] = NULL;
-    }
-
-    if (pCtx->sMb.pTransformSize8x8Flag[i]) {
-      pMa->WelsFree (pCtx->sMb.pTransformSize8x8Flag[i], "pCtx->sMb.pTransformSize8x8Flag[]");
-
-      pCtx->sMb.pTransformSize8x8Flag[i] = NULL;
-    }
-
-    if (pCtx->sMb.pLumaQp[i]) {
-      pMa->WelsFree (pCtx->sMb.pLumaQp[i], "pCtx->sMb.pLumaQp[]");
-
-      pCtx->sMb.pLumaQp[i] = NULL;
-    }
-
-    if (pCtx->sMb.pChromaQp[i]) {
-      pMa->WelsFree (pCtx->sMb.pChromaQp[i], "pCtx->sMb.pChromaQp[]");
-
-      pCtx->sMb.pChromaQp[i] = NULL;
-    }
-
-    if (pCtx->sMb.pMvd[i][0]) {
-      pMa->WelsFree (pCtx->sMb.pMvd[i][0], "pCtx->sMb.pMvd[][]");
-      pCtx->sMb.pMvd[i][0] = NULL;
-    }
-
-    if (pCtx->sMb.pCbfDc[i]) {
-      pMa->WelsFree (pCtx->sMb.pCbfDc[i], "pCtx->sMb.pCbfDc[]");
-      pCtx->sMb.pCbfDc[i] = NULL;
-    }
-
-    if (pCtx->sMb.pNzc[i]) {
-      pMa->WelsFree (pCtx->sMb.pNzc[i], "pCtx->sMb.pNzc[]");
-
-      pCtx->sMb.pNzc[i] = NULL;
-    }
-
-    if (pCtx->sMb.pNzcRs[i]) {
-      pMa->WelsFree (pCtx->sMb.pNzcRs[i], "pCtx->sMb.pNzcRs[]");
-
-      pCtx->sMb.pNzcRs[i] = NULL;
-    }
-
-    if (pCtx->sMb.pScaledTCoeff[i]) {
-      pMa->WelsFree (pCtx->sMb.pScaledTCoeff[i], "pCtx->sMb.pScaledTCoeff[]");
-
-      pCtx->sMb.pScaledTCoeff[i] = NULL;
-    }
-
-    if (pCtx->sMb.pIntraPredMode[i]) {
-      pMa->WelsFree (pCtx->sMb.pIntraPredMode[i], "pCtx->sMb.pIntraPredMode[]");
-
-      pCtx->sMb.pIntraPredMode[i] = NULL;
-    }
-
-    if (pCtx->sMb.pIntra4x4FinalMode[i]) {
-      pMa->WelsFree (pCtx->sMb.pIntra4x4FinalMode[i], "pCtx->sMb.pIntra4x4FinalMode[]");
-
-      pCtx->sMb.pIntra4x4FinalMode[i] = NULL;
-    }
-
-    if (pCtx->sMb.pIntraNxNAvailFlag[i]) {
-      pMa->WelsFree (pCtx->sMb.pIntraNxNAvailFlag[i], "pCtx->sMb.pIntraNxNAvailFlag");
-
-      pCtx->sMb.pIntraNxNAvailFlag[i] = NULL;
-    }
-
-    if (pCtx->sMb.pChromaPredMode[i]) {
-      pMa->WelsFree (pCtx->sMb.pChromaPredMode[i], "pCtx->sMb.pChromaPredMode[]");
-
-      pCtx->sMb.pChromaPredMode[i] = NULL;
-    }
-
-    if (pCtx->sMb.pCbp[i]) {
-      pMa->WelsFree (pCtx->sMb.pCbp[i], "pCtx->sMb.pCbp[]");
-
-      pCtx->sMb.pCbp[i] = NULL;
-    }
-
-    //      if (pCtx->sMb.pMotionPredFlag[i])
-    //{
-    //  pMa->WelsFree( pCtx->sMb.pMotionPredFlag[i], "pCtx->sMb.pMotionPredFlag[]" );
-
-    //  pCtx->sMb.pMotionPredFlag[i] = NULL;
-    //}
-
-    if (pCtx->sMb.pSubMbType[i]) {
-      pMa->WelsFree (pCtx->sMb.pSubMbType[i], "pCtx->sMb.pSubMbType[]");
-
-      pCtx->sMb.pSubMbType[i] = NULL;
-    }
-
-    if (pCtx->sMb.pSliceIdc[i]) {
-      pMa->WelsFree (pCtx->sMb.pSliceIdc[i], "pCtx->sMb.pSliceIdc[]");
-
-      pCtx->sMb.pSliceIdc[i] = NULL;
-    }
-
-    if (pCtx->sMb.pResidualPredFlag[i]) {
-      pMa->WelsFree (pCtx->sMb.pResidualPredFlag[i], "pCtx->sMb.pResidualPredFlag[]");
-
-      pCtx->sMb.pResidualPredFlag[i] = NULL;
-    }
-
-    if (pCtx->sMb.pInterPredictionDoneFlag[i]) {
-      pMa->WelsFree (pCtx->sMb.pInterPredictionDoneFlag[i], "pCtx->sMb.pInterPredictionDoneFlag[]");
-
-      pCtx->sMb.pInterPredictionDoneFlag[i] = NULL;
-    }
-
-    if (pCtx->sMb.pMbCorrectlyDecodedFlag[i]) {
-      pMa->WelsFree (pCtx->sMb.pMbCorrectlyDecodedFlag[i], "pCtx->sMb.pMbCorrectlyDecodedFlag[]");
-      pCtx->sMb.pMbCorrectlyDecodedFlag[i] = NULL;
-    }
-
-    if (pCtx->sMb.pMbRefConcealedFlag[i]) {
-      pMa->WelsFree (pCtx->sMb.pMbRefConcealedFlag[i], "pCtx->sMb.pMbRefConcealedFlag[]");
-      pCtx->sMb.pMbRefConcealedFlag[i] = NULL;
-    }
-    pMa->WelsFree (pDq, "pDq");
-
-    pDq = NULL;
-    pCtx->pDqLayersList[i] = NULL;
-
-    ++ i;
-  } while (i < LAYER_NUM_EXCHANGEABLE);
-
-  pCtx->iPicWidthReq            = 0;
-  pCtx->iPicHeightReq           = 0;
-  pCtx->bInitialDqLayersMem     = false;
-}
-
-void ResetCurrentAccessUnit (PWelsDecoderContext pCtx) {
-  PAccessUnit pCurAu = pCtx->pAccessUnitList;
-  pCurAu->uiStartPos            = 0;
-  pCurAu->uiEndPos              = 0;
-  pCurAu->bCompletedAuFlag      = false;
-  if (pCurAu->uiActualUnitsNum > 0) {
-    uint32_t iIdx = 0;
-    const uint32_t kuiActualNum = pCurAu->uiActualUnitsNum;
-    // a more simpler method to do nal units list management prefered here
-    const uint32_t kuiAvailNum  = pCurAu->uiAvailUnitsNum;
-    const uint32_t kuiLeftNum   = kuiAvailNum - kuiActualNum;
-
-    // Swapping active nal unit nodes of succeeding AU with leading of list
-    while (iIdx < kuiLeftNum) {
-      PNalUnit t = pCurAu->pNalUnitsList[kuiActualNum + iIdx];
-      pCurAu->pNalUnitsList[kuiActualNum + iIdx] = pCurAu->pNalUnitsList[iIdx];
-      pCurAu->pNalUnitsList[iIdx] = t;
-      ++ iIdx;
-    }
-    pCurAu->uiActualUnitsNum = pCurAu->uiAvailUnitsNum = kuiLeftNum;
-  }
-}
-
-/*!
- * \brief   Force reset current Acess Unit Nal list in case error parsing/decoding in current AU
- * \author
- * \history 11/16/2009
- */
-void ForceResetCurrentAccessUnit (PAccessUnit pAu) {
-  uint32_t uiSucAuIdx = pAu->uiEndPos + 1;
-  uint32_t uiCurAuIdx = 0;
-
-  // swap the succeeding AU's nal units to the front
-  while (uiSucAuIdx < pAu->uiAvailUnitsNum) {
-    PNalUnit t = pAu->pNalUnitsList[uiSucAuIdx];
-    pAu->pNalUnitsList[uiSucAuIdx] = pAu->pNalUnitsList[uiCurAuIdx];
-    pAu->pNalUnitsList[uiCurAuIdx] = t;
-    ++ uiSucAuIdx;
-    ++ uiCurAuIdx;
-  }
-
-  // Update avail/actual units num accordingly for next AU parsing
-  if (pAu->uiAvailUnitsNum > pAu->uiEndPos)
-    pAu->uiAvailUnitsNum -= (pAu->uiEndPos + 1);
-  else
-    pAu->uiAvailUnitsNum = 0;
-  pAu->uiActualUnitsNum = 0;
-  pAu->uiStartPos       = 0;
-  pAu->uiEndPos         = 0;
-  pAu->bCompletedAuFlag = false;
-}
-
-//clear current corrupted NAL from pNalUnitsList
-void ForceClearCurrentNal (PAccessUnit pAu) {
-  if (pAu->uiAvailUnitsNum > 0)
-    -- pAu->uiAvailUnitsNum;
-}
-
-void ForceResetParaSetStatusAndAUList (PWelsDecoderContext pCtx) {
-  pCtx->bSpsExistAheadFlag = false;
-  pCtx->bSubspsExistAheadFlag = false;
-  pCtx->bPpsExistAheadFlag = false;
-
-  // Force clear the AU list
-  pCtx->pAccessUnitList->uiAvailUnitsNum        = 0;
-  pCtx->pAccessUnitList->uiActualUnitsNum       = 0;
-  pCtx->pAccessUnitList->uiStartPos             = 0;
-  pCtx->pAccessUnitList->uiEndPos               = 0;
-  pCtx->pAccessUnitList->bCompletedAuFlag       = false;
-}
-
-void CheckAvailNalUnitsListContinuity (PWelsDecoderContext pCtx, int32_t iStartIdx, int32_t iEndIdx) {
-  PAccessUnit pCurAu = pCtx->pAccessUnitList;
-
-  uint8_t uiLastNuDependencyId, uiLastNuLayerDqId;
-  uint8_t uiCurNuDependencyId, uiCurNuQualityId, uiCurNuLayerDqId, uiCurNuRefLayerDqId;
-
-  int32_t iCurNalUnitIdx = 0;
-
-  //check the continuity of pNalUnitsList forwards (from pIdxNoInterLayerPred to end_postion)
-  uiLastNuDependencyId = pCurAu->pNalUnitsList[iStartIdx]->sNalHeaderExt.uiDependencyId;//starting nal unit
-  uiLastNuLayerDqId   = pCurAu->pNalUnitsList[iStartIdx]->sNalHeaderExt.uiLayerDqId;//starting nal unit
-  iCurNalUnitIdx = iStartIdx + 1;//current nal unit
-  while (iCurNalUnitIdx <= iEndIdx) {
-    uiCurNuDependencyId   = pCurAu->pNalUnitsList[iCurNalUnitIdx]->sNalHeaderExt.uiDependencyId;
-    uiCurNuQualityId      = pCurAu->pNalUnitsList[iCurNalUnitIdx]->sNalHeaderExt.uiQualityId;
-    uiCurNuLayerDqId     = pCurAu->pNalUnitsList[iCurNalUnitIdx]->sNalHeaderExt.uiLayerDqId;
-    uiCurNuRefLayerDqId = pCurAu->pNalUnitsList[iCurNalUnitIdx]->sNalData.sVclNal.sSliceHeaderExt.uiRefLayerDqId;
-
-    if (uiCurNuDependencyId == uiLastNuDependencyId) {
-      uiLastNuLayerDqId = uiCurNuLayerDqId;
-      ++ iCurNalUnitIdx;
-    } else { //uiCurNuDependencyId != uiLastNuDependencyId, new dependency arrive
-      if (uiCurNuQualityId == 0) {
-        uiLastNuDependencyId = uiCurNuDependencyId;
-        if (uiCurNuRefLayerDqId == uiLastNuLayerDqId) {
-          uiLastNuLayerDqId = uiCurNuLayerDqId;
-          ++ iCurNalUnitIdx;
-        } else { //cur_nu_layer_id != next_nu_ref_layer_dq_id, the chain is broken at this point
-          break;
-        }
-      } else { //new dependency arrive, but no base quality layer, so we must stop in this point
-        break;
-      }
-    }
-  }
-
-  -- iCurNalUnitIdx;
-  pCurAu->uiEndPos = iCurNalUnitIdx;
-  pCtx->uiTargetDqId = pCurAu->pNalUnitsList[iCurNalUnitIdx]->sNalHeaderExt.uiLayerDqId;
-}
-
-//main purpose: to support multi-slice and to include all slice which have the same uiDependencyId, uiQualityId and frame_num
-//for single slice, pIdxNoInterLayerPred SHOULD NOT be modified
-void RefineIdxNoInterLayerPred (PAccessUnit pCurAu, int32_t* pIdxNoInterLayerPred) {
-  int32_t iLastNalDependId  = pCurAu->pNalUnitsList[*pIdxNoInterLayerPred]->sNalHeaderExt.uiDependencyId;
-  int32_t iLastNalQualityId = pCurAu->pNalUnitsList[*pIdxNoInterLayerPred]->sNalHeaderExt.uiQualityId;
-  uint8_t uiLastNalTId       = pCurAu->pNalUnitsList[*pIdxNoInterLayerPred]->sNalHeaderExt.uiTemporalId;
-  int32_t iLastNalFrameNum  =
-    pCurAu->pNalUnitsList[*pIdxNoInterLayerPred]->sNalData.sVclNal.sSliceHeaderExt.sSliceHeader.iFrameNum;
-  int32_t iLastNalPoc        =
-    pCurAu->pNalUnitsList[*pIdxNoInterLayerPred]->sNalData.sVclNal.sSliceHeaderExt.sSliceHeader.iPicOrderCntLsb;
-  int32_t iLastNalFirstMb   =
-    pCurAu->pNalUnitsList[*pIdxNoInterLayerPred]->sNalData.sVclNal.sSliceHeaderExt.sSliceHeader.iFirstMbInSlice;
-  int32_t iCurNalDependId, iCurNalQualityId, iCurNalTId, iCurNalFrameNum, iCurNalPoc, iCurNalFirstMb, iCurIdx,
-          iFinalIdxNoInterLayerPred;
-
-  bool  bMultiSliceFind = false;
-
-  iFinalIdxNoInterLayerPred = 0;
-  iCurIdx = *pIdxNoInterLayerPred - 1;
-  while (iCurIdx >= 0) {
-    if (pCurAu->pNalUnitsList[iCurIdx]->sNalHeaderExt.iNoInterLayerPredFlag) {
-      iCurNalDependId  = pCurAu->pNalUnitsList[iCurIdx]->sNalHeaderExt.uiDependencyId;
-      iCurNalQualityId = pCurAu->pNalUnitsList[iCurIdx]->sNalHeaderExt.uiQualityId;
-      iCurNalTId       = pCurAu->pNalUnitsList[iCurIdx]->sNalHeaderExt.uiTemporalId;
-      iCurNalFrameNum  = pCurAu->pNalUnitsList[iCurIdx]->sNalData.sVclNal.sSliceHeaderExt.sSliceHeader.iFrameNum;
-      iCurNalPoc        = pCurAu->pNalUnitsList[iCurIdx]->sNalData.sVclNal.sSliceHeaderExt.sSliceHeader.iPicOrderCntLsb;
-      iCurNalFirstMb   = pCurAu->pNalUnitsList[iCurIdx]->sNalData.sVclNal.sSliceHeaderExt.sSliceHeader.iFirstMbInSlice;
-
-      if (iCurNalDependId == iLastNalDependId  &&
-          iCurNalQualityId == iLastNalQualityId &&
-          iCurNalTId       == uiLastNalTId       &&
-          iCurNalFrameNum  == iLastNalFrameNum  &&
-          iCurNalPoc        == iLastNalPoc        &&
-          iCurNalFirstMb   != iLastNalFirstMb) {
-        bMultiSliceFind = true;
-        iFinalIdxNoInterLayerPred = iCurIdx;
-        --iCurIdx;
-        continue;
-      } else {
-        break;
-      }
-    }
-    --iCurIdx;
-  }
-
-  if (bMultiSliceFind && *pIdxNoInterLayerPred != iFinalIdxNoInterLayerPred) {
-    *pIdxNoInterLayerPred = iFinalIdxNoInterLayerPred;
-  }
-}
-
-bool CheckPocOfCurValidNalUnits (PAccessUnit pCurAu, int32_t pIdxNoInterLayerPred) {
-  int32_t iEndIdx    = pCurAu->uiEndPos;
-  int32_t iCurAuPoc =
-    pCurAu->pNalUnitsList[pIdxNoInterLayerPred]->sNalData.sVclNal.sSliceHeaderExt.sSliceHeader.iPicOrderCntLsb;
-  int32_t iTmpPoc, i;
-  for (i = pIdxNoInterLayerPred + 1; i < iEndIdx; i++) {
-    iTmpPoc = pCurAu->pNalUnitsList[i]->sNalData.sVclNal.sSliceHeaderExt.sSliceHeader.iPicOrderCntLsb;
-    if (iTmpPoc != iCurAuPoc) {
-      return false;
-    }
-  }
-
-  return true;
-}
-
-bool CheckIntegrityNalUnitsList (PWelsDecoderContext pCtx) {
-  PAccessUnit pCurAu = pCtx->pAccessUnitList;
-  const int32_t kiEndPos = pCurAu->uiEndPos;
-  int32_t iIdxNoInterLayerPred = 0;
-
-  if (!pCurAu->bCompletedAuFlag)
-    return false;
-
-  if (pCtx->bNewSeqBegin) {
-    pCurAu->uiStartPos = 0;
-    //step1: search the pNalUnit whose iNoInterLayerPredFlag equal to 1 backwards (from uiEndPos to 0)
-    iIdxNoInterLayerPred = kiEndPos;
-    while (iIdxNoInterLayerPred >= 0) {
-      if (pCurAu->pNalUnitsList[iIdxNoInterLayerPred]->sNalHeaderExt.iNoInterLayerPredFlag) {
-        break;
-      }
-      --iIdxNoInterLayerPred;
-    }
-    if (iIdxNoInterLayerPred < 0) {
-      //can not find the Nal Unit whose no_inter_pred_falg equal to 1, MUST STOP decode
-      return false;
-    }
-
-    //step2: support multi-slice, to include all base layer slice
-    RefineIdxNoInterLayerPred (pCurAu, &iIdxNoInterLayerPred);
-    pCurAu->uiStartPos = iIdxNoInterLayerPred;
-    CheckAvailNalUnitsListContinuity (pCtx, iIdxNoInterLayerPred, kiEndPos);
-
-    if (!CheckPocOfCurValidNalUnits (pCurAu, iIdxNoInterLayerPred)) {
-      return false;
-    }
-
-    pCtx->iCurSeqIntervalTargetDependId = pCurAu->pNalUnitsList[pCurAu->uiEndPos]->sNalHeaderExt.uiDependencyId;
-    pCtx->iCurSeqIntervalMaxPicWidth  =
-      pCurAu->pNalUnitsList[pCurAu->uiEndPos]->sNalData.sVclNal.sSliceHeaderExt.sSliceHeader.iMbWidth << 4;
-    pCtx->iCurSeqIntervalMaxPicHeight =
-      pCurAu->pNalUnitsList[pCurAu->uiEndPos]->sNalData.sVclNal.sSliceHeaderExt.sSliceHeader.iMbHeight << 4;
-  } else { //P_SLICE
-    //step 1: search uiDependencyId equal to pCtx->cur_seq_interval_target_dependency_id
-    bool bGetDependId = false;
-    int32_t iIdxDependId = 0;
-
-    iIdxDependId = kiEndPos;
-    while (iIdxDependId >= 0) {
-      if (pCtx->iCurSeqIntervalTargetDependId == pCurAu->pNalUnitsList[iIdxDependId]->sNalHeaderExt.uiDependencyId) {
-        bGetDependId = true;
-        break;
-      } else {
-        --iIdxDependId;
-      }
-    }
-
-    //step 2: switch according to whether or not find the index of pNalUnit whose uiDependencyId equal to iCurSeqIntervalTargetDependId
-    if (bGetDependId) { //get the index of pNalUnit whose uiDependencyId equal to iCurSeqIntervalTargetDependId
-      bool bGetNoInterPredFront = false;
-      //step 2a: search iNoInterLayerPredFlag [0....iIdxDependId]
-      iIdxNoInterLayerPred = iIdxDependId;
-      while (iIdxNoInterLayerPred >= 0) {
-        if (pCurAu->pNalUnitsList[iIdxNoInterLayerPred]->sNalHeaderExt.iNoInterLayerPredFlag) {
-          bGetNoInterPredFront = true;
-          break;
-        }
-        --iIdxNoInterLayerPred;
-      }
-      //step 2b: switch, whether or not find the NAL unit whose no_inter_pred_flag equal to 1 among [0....iIdxDependId]
-      if (bGetNoInterPredFront) { //YES
-        RefineIdxNoInterLayerPred (pCurAu, &iIdxNoInterLayerPred);
-        pCurAu->uiStartPos = iIdxNoInterLayerPred;
-        CheckAvailNalUnitsListContinuity (pCtx, iIdxNoInterLayerPred, iIdxDependId);
-
-        if (!CheckPocOfCurValidNalUnits (pCurAu, iIdxNoInterLayerPred)) {
-          return false;
-        }
-      } else { //NO, should find the NAL unit whose no_inter_pred_flag equal to 1 among [iIdxDependId....uiEndPos]
-        iIdxNoInterLayerPred = iIdxDependId;
-        while (iIdxNoInterLayerPred <= kiEndPos) {
-          if (pCurAu->pNalUnitsList[iIdxNoInterLayerPred]->sNalHeaderExt.iNoInterLayerPredFlag) {
-            break;
-          }
-          ++iIdxNoInterLayerPred;
-        }
-
-        if (iIdxNoInterLayerPred > kiEndPos) {
-          return false; //cann't find the index of pNalUnit whose no_inter_pred_flag = 1
-        }
-
-        RefineIdxNoInterLayerPred (pCurAu, &iIdxNoInterLayerPred);
-        pCurAu->uiStartPos = iIdxNoInterLayerPred;
-        CheckAvailNalUnitsListContinuity (pCtx, iIdxNoInterLayerPred, kiEndPos);
-
-        if (!CheckPocOfCurValidNalUnits (pCurAu, iIdxNoInterLayerPred)) {
-          return false;
-        }
-      }
-    } else { //without the index of pNalUnit, should process this AU as common case
-      iIdxNoInterLayerPred = kiEndPos;
-      while (iIdxNoInterLayerPred >= 0) {
-        if (pCurAu->pNalUnitsList[iIdxNoInterLayerPred]->sNalHeaderExt.iNoInterLayerPredFlag) {
-          break;
-        }
-        --iIdxNoInterLayerPred;
-      }
-      if (iIdxNoInterLayerPred < 0) {
-        return false; //cann't find the index of pNalUnit whose iNoInterLayerPredFlag = 1
-      }
-
-      RefineIdxNoInterLayerPred (pCurAu, &iIdxNoInterLayerPred);
-      pCurAu->uiStartPos = iIdxNoInterLayerPred;
-      CheckAvailNalUnitsListContinuity (pCtx, iIdxNoInterLayerPred, kiEndPos);
-
-      if (!CheckPocOfCurValidNalUnits (pCurAu, iIdxNoInterLayerPred)) {
-        return false;
-      }
-    }
-  }
-
-  return true;
-}
-
-void CheckOnlyOneLayerInAu (PWelsDecoderContext pCtx) {
-  PAccessUnit pCurAu = pCtx->pAccessUnitList;
-
-  int32_t iEndIdx = pCurAu->uiEndPos;
-  int32_t iCurIdx = pCurAu->uiStartPos;
-  uint8_t uiDId = pCurAu->pNalUnitsList[iCurIdx]->sNalHeaderExt.uiDependencyId;
-  uint8_t uiQId = pCurAu->pNalUnitsList[iCurIdx]->sNalHeaderExt.uiQualityId;
-  uint8_t uiTId = pCurAu->pNalUnitsList[iCurIdx]->sNalHeaderExt.uiTemporalId;
-
-  uint8_t uiCurDId, uiCurQId, uiCurTId;
-
-  pCtx->bOnlyOneLayerInCurAuFlag = true;
-
-  if (iEndIdx == iCurIdx) { //only one NAL in pNalUnitsList
-    return;
-  }
-
-  ++iCurIdx;
-  while (iCurIdx <= iEndIdx) {
-    uiCurDId = pCurAu->pNalUnitsList[iCurIdx]->sNalHeaderExt.uiDependencyId;
-    uiCurQId = pCurAu->pNalUnitsList[iCurIdx]->sNalHeaderExt.uiQualityId;
-    uiCurTId = pCurAu->pNalUnitsList[iCurIdx]->sNalHeaderExt.uiTemporalId;
-
-    if (uiDId != uiCurDId || uiQId != uiCurQId || uiTId != uiCurTId) {
-      pCtx->bOnlyOneLayerInCurAuFlag = false;
-      return;
-    }
-
-    ++iCurIdx;
-  }
-}
-
-int32_t WelsDecodeAccessUnitStart (PWelsDecoderContext pCtx) {
-  // Roll back NAL units not being belong to current access unit list for proceeded access unit
-  int32_t iRet = UpdateAccessUnit (pCtx);
-  if (iRet != ERR_NONE)
-    return iRet;
-
-  pCtx->pAccessUnitList->uiStartPos = 0;
-  if (!pCtx->bAvcBasedFlag && !CheckIntegrityNalUnitsList (pCtx)) {
-    pCtx->iErrorCode |= dsBitstreamError;
-    return dsBitstreamError;
-  }
-
-  //check current AU has only one layer or not
-  //If YES, can use deblocking based on AVC
-  if (!pCtx->bAvcBasedFlag) {
-    CheckOnlyOneLayerInAu (pCtx);
-  }
-
-  return ERR_NONE;
-}
-
-void WelsDecodeAccessUnitEnd (PWelsDecoderContext pCtx) {
-  //save previous header info
-  PAccessUnit pCurAu = pCtx->pAccessUnitList;
-  PNalUnit pCurNal = pCurAu->pNalUnitsList[pCurAu->uiEndPos];
-  memcpy (&pCtx->sLastNalHdrExt, &pCurNal->sNalHeaderExt, sizeof (SNalUnitHeaderExt));
-  memcpy (&pCtx->sLastSliceHeader,
-          &pCurNal->sNalData.sVclNal.sSliceHeaderExt.sSliceHeader, sizeof (SSliceHeader));
-  // uninitialize context of current access unit and rbsp buffer clean
-  ResetCurrentAccessUnit (pCtx);
-}
-
-/* CheckNewSeqBeginAndUpdateActiveLayerSps
- * return:
- * true - the AU to be construct is the start of new sequence; false - not
- */
-static bool CheckNewSeqBeginAndUpdateActiveLayerSps (PWelsDecoderContext pCtx) {
-  bool bNewSeq = false;
-  PAccessUnit pCurAu = pCtx->pAccessUnitList;
-  PSps pTmpLayerSps[MAX_LAYER_NUM];
-  for (int i = 0; i < MAX_LAYER_NUM; i++) {
-    pTmpLayerSps[i] = NULL;
-  }
-  // track the layer sps for the current au
-  for (unsigned int i = pCurAu->uiStartPos; i <= pCurAu->uiEndPos; i++) {
-    uint32_t uiDid = pCurAu->pNalUnitsList[i]->sNalHeaderExt.uiDependencyId;
-    pTmpLayerSps[uiDid] = pCurAu->pNalUnitsList[i]->sNalData.sVclNal.sSliceHeaderExt.sSliceHeader.pSps;
-    if ((pCurAu->pNalUnitsList[i]->sNalHeaderExt.sNalUnitHeader.eNalUnitType == NAL_UNIT_CODED_SLICE_IDR)
-        || (pCurAu->pNalUnitsList[i]->sNalHeaderExt.bIdrFlag))
-      bNewSeq = true;
-  }
-  int iMaxActiveLayer = 0, iMaxCurrentLayer = 0;
-  for (int i = MAX_LAYER_NUM - 1; i >= 0; i--) {
-    if (pCtx->pActiveLayerSps[i] != NULL) {
-      iMaxActiveLayer = i;
-      break;
-    }
-  }
-  for (int i = MAX_LAYER_NUM - 1; i >= 0; i--) {
-    if (pTmpLayerSps[i] != NULL) {
-      iMaxCurrentLayer = i;
-      break;
-    }
-  }
-  if ((iMaxCurrentLayer != iMaxActiveLayer)
-      || (pTmpLayerSps[iMaxCurrentLayer]  != pCtx->pActiveLayerSps[iMaxActiveLayer])) {
-    bNewSeq = true;
-  }
-  // fill active sps if the current sps is not null while active layer is null
-  if (!bNewSeq) {
-    for (int i = 0; i < MAX_LAYER_NUM; i++) {
-      if (pCtx->pActiveLayerSps[i] == NULL && pTmpLayerSps[i] != NULL) {
-        pCtx->pActiveLayerSps[i] = pTmpLayerSps[i];
-      }
-    }
-  } else {
-    // UpdateActiveLayerSps if new sequence start
-    memcpy (&pCtx->pActiveLayerSps[0], &pTmpLayerSps[0], MAX_LAYER_NUM * sizeof (PSps));
-  }
-  return bNewSeq;
-}
-
-static void WriteBackActiveParameters (PWelsDecoderContext pCtx) {
-  if (pCtx->iOverwriteFlags & OVERWRITE_PPS) {
-    memcpy (&pCtx->sPpsBuffer[pCtx->sPpsBuffer[MAX_PPS_COUNT].iPpsId], &pCtx->sPpsBuffer[MAX_PPS_COUNT], sizeof (SPps));
-  }
-  if (pCtx->iOverwriteFlags & OVERWRITE_SPS) {
-    memcpy (&pCtx->sSpsBuffer[pCtx->sSpsBuffer[MAX_SPS_COUNT].iSpsId], &pCtx->sSpsBuffer[MAX_SPS_COUNT], sizeof (SSps));
-    pCtx->bNewSeqBegin = true;
-  }
-  if (pCtx->iOverwriteFlags & OVERWRITE_SUBSETSPS) {
-    memcpy (&pCtx->sSubsetSpsBuffer[pCtx->sSubsetSpsBuffer[MAX_SPS_COUNT].sSps.iSpsId],
-            &pCtx->sSubsetSpsBuffer[MAX_SPS_COUNT], sizeof (SSubsetSps));
-    pCtx->bNewSeqBegin = true;
-  }
-  pCtx->iOverwriteFlags = OVERWRITE_NONE;
-}
-
-/*
- * DecodeFinishUpdate
- * decoder finish decoding, update active parameter sets and new seq status
- *
- */
-
-void DecodeFinishUpdate (PWelsDecoderContext pCtx) {
-  pCtx->bNewSeqBegin = false;
-  WriteBackActiveParameters (pCtx);
-  pCtx->bNewSeqBegin = pCtx->bNewSeqBegin || pCtx->bNextNewSeqBegin;
-  pCtx->bNextNewSeqBegin = false; // reset it
-  if (pCtx->bNewSeqBegin)
-    ResetActiveSPSForEachLayer (pCtx);
-}
-
-/*
- * ConstructAccessUnit
- * construct an access unit for given input bitstream, maybe partial NAL Unit, one or more Units are involved to
- * joint a collective access unit.
- * parameter\
- *  buf:        bitstream data buffer
- *  bit_len:    size in bit length of data
- *  buf_len:    size in byte length of data
- *  coded_au:   mark an Access Unit decoding finished
- * return:
- *  0 - success; otherwise returned error_no defined in error_no.h
- */
-int32_t ConstructAccessUnit (PWelsDecoderContext pCtx, uint8_t** ppDst, SBufferInfo* pDstInfo) {
-  int32_t iErr;
-  PAccessUnit pCurAu = pCtx->pAccessUnitList;
-  pCtx->bAuReadyFlag = false;
-  pCtx->bLastHasMmco5 = false;
-  bool bTmpNewSeqBegin = CheckNewSeqBeginAndUpdateActiveLayerSps (pCtx);
-  pCtx->bNewSeqBegin = pCtx->bNewSeqBegin || bTmpNewSeqBegin;
-  iErr = WelsDecodeAccessUnitStart (pCtx);
-  GetVclNalTemporalId (pCtx);
-
-  if (ERR_NONE != iErr) {
-    ForceResetCurrentAccessUnit (pCtx->pAccessUnitList);
-    if (!pCtx->pParam->bParseOnly)
-      pDstInfo->iBufferStatus = 0;
-    pCtx->bNewSeqBegin = pCtx->bNewSeqBegin || pCtx->bNextNewSeqBegin;
-    pCtx->bNextNewSeqBegin = false; // reset it
-    if (pCtx->bNewSeqBegin)
-      ResetActiveSPSForEachLayer (pCtx);
-    return iErr;
-  }
-
-  pCtx->pSps = pCurAu->pNalUnitsList[pCurAu->uiStartPos]->sNalData.sVclNal.sSliceHeaderExt.sSliceHeader.pSps;
-  pCtx->pPps = pCurAu->pNalUnitsList[pCurAu->uiStartPos]->sNalData.sVclNal.sSliceHeaderExt.sSliceHeader.pPps;
-
-  //try to allocate or relocate DPB memory only when new sequence is coming.
-  if (pCtx->bNewSeqBegin) {
-    WelsResetRefPic (pCtx); //clear ref pPic when IDR NAL
-    iErr = SyncPictureResolutionExt (pCtx, pCtx->pSps->iMbWidth, pCtx->pSps->iMbHeight);
-
-    if (ERR_NONE != iErr) {
-      WelsLog (& (pCtx->sLogCtx), WELS_LOG_WARNING, "sync picture resolution ext failed,  the error is %d", iErr);
-      return iErr;
-    }
-  }
-
-  iErr = DecodeCurrentAccessUnit (pCtx, ppDst, pDstInfo);
-
-  WelsDecodeAccessUnitEnd (pCtx);
-
-  if (ERR_NONE != iErr) {
-    WelsLog (& (pCtx->sLogCtx), WELS_LOG_DEBUG, "returned error from decoding:[0x%x]", iErr);
-    return iErr;
-  }
-
-  return ERR_NONE;
-}
-
-static inline void InitDqLayerInfo (PDqLayer pDqLayer, PLayerInfo pLayerInfo, PNalUnit pNalUnit, PPicture pPicDec) {
-  PNalUnitHeaderExt pNalHdrExt    = &pNalUnit->sNalHeaderExt;
-  PSliceHeaderExt pShExt          = &pNalUnit->sNalData.sVclNal.sSliceHeaderExt;
-  PSliceHeader pSh                = &pShExt->sSliceHeader;
-  const uint8_t kuiQualityId      = pNalHdrExt->uiQualityId;
-
-  memcpy (&pDqLayer->sLayerInfo, pLayerInfo, sizeof (SLayerInfo)); //confirmed_safe_unsafe_usage
-
-  pDqLayer->pDec        = pPicDec;
-  pDqLayer->iMbWidth    = pSh->iMbWidth;        // MB width of this picture
-  pDqLayer->iMbHeight   = pSh->iMbHeight;// MB height of this picture
-
-  pDqLayer->iSliceIdcBackup = (pSh->iFirstMbInSlice << 7) | (pNalHdrExt->uiDependencyId << 4) | (pNalHdrExt->uiQualityId);
-
-  /* Common syntax elements across all slices of a DQLayer */
-  pDqLayer->uiPpsId                                     = pLayerInfo->pPps->iPpsId;
-  pDqLayer->uiDisableInterLayerDeblockingFilterIdc      = pShExt->uiDisableInterLayerDeblockingFilterIdc;
-  pDqLayer->iInterLayerSliceAlphaC0Offset               = pShExt->iInterLayerSliceAlphaC0Offset;
-  pDqLayer->iInterLayerSliceBetaOffset                  = pShExt->iInterLayerSliceBetaOffset;
-  pDqLayer->iSliceGroupChangeCycle                      = pSh->iSliceGroupChangeCycle;
-  pDqLayer->bStoreRefBasePicFlag                        = pShExt->bStoreRefBasePicFlag;
-  pDqLayer->bTCoeffLevelPredFlag                        = pShExt->bTCoeffLevelPredFlag;
-  pDqLayer->bConstrainedIntraResamplingFlag             = pShExt->bConstrainedIntraResamplingFlag;
-  pDqLayer->uiRefLayerDqId                              = pShExt->uiRefLayerDqId;
-  pDqLayer->uiRefLayerChromaPhaseXPlus1Flag             = pShExt->uiRefLayerChromaPhaseXPlus1Flag;
-  pDqLayer->uiRefLayerChromaPhaseYPlus1                 = pShExt->uiRefLayerChromaPhaseYPlus1;
-  //memcpy(&pDqLayer->sScaledRefLayer, &pShExt->sScaledRefLayer, sizeof(SPosOffset));//confirmed_safe_unsafe_usage
-
-  if (kuiQualityId == BASE_QUALITY_ID) {
-    pDqLayer->pRefPicListReordering     = &pSh->pRefPicListReordering;
-    pDqLayer->pRefPicMarking            = &pSh->sRefMarking;
-
-    if (pSh->pPps->bWeightedPredFlag) {
-      pDqLayer->bUseWeightPredictionFlag = true;
-      pDqLayer->pPredWeightTable    = &pSh->sPredWeightTable;
-
-    } else
-      pDqLayer->bUseWeightPredictionFlag = false;
-
-    pDqLayer->pRefPicBaseMarking        = &pShExt->sRefBasePicMarking;
-  }
-
-  pDqLayer->uiLayerDqId                 = pNalHdrExt->uiLayerDqId;      // dq_id of current layer
-  pDqLayer->bUseRefBasePicFlag          = pNalHdrExt->bUseRefBasePicFlag;
-}
-
-void WelsDqLayerDecodeStart (PWelsDecoderContext pCtx, PNalUnit pCurNal, PSps pSps, PPps pPps) {
-  PSliceHeader pSh = &pCurNal->sNalData.sVclNal.sSliceHeaderExt.sSliceHeader;
-
-  pCtx->eSliceType   = pSh->eSliceType;
-  pCtx->pSliceHeader = pSh;
-
-  pCtx->iFrameNum    = pSh->iFrameNum;
-
-  UpdateDecoderStatisticsForActiveParaset (& (pCtx->sDecoderStatistics),
-      pSps, pPps);
-}
-
-int32_t InitRefPicList (PWelsDecoderContext pCtx, const uint8_t kuiNRi, int32_t iPoc) {
-  int32_t iRet = ERR_NONE;
-  iRet = WelsInitRefList (pCtx, iPoc);
-  if ((pCtx->eSliceType != I_SLICE && pCtx->eSliceType != SI_SLICE)) {
-    iRet = WelsReorderRefList (pCtx);
-  }
-
-  return iRet;
-}
-
-void InitCurDqLayerData (PWelsDecoderContext pCtx, PDqLayer pCurDq) {
-  if (NULL != pCtx && NULL != pCurDq) {
-    pCurDq->pMbType         = pCtx->sMb.pMbType[0];
-    pCurDq->pSliceIdc       = pCtx->sMb.pSliceIdc[0];
-    pCurDq->pMv[0]          = pCtx->sMb.pMv[0][0];
-    pCurDq->pRefIndex[0]    = pCtx->sMb.pRefIndex[0][0];
-    pCurDq->pNoSubMbPartSizeLessThan8x8Flag = pCtx->sMb.pNoSubMbPartSizeLessThan8x8Flag[0];
-    pCurDq->pTransformSize8x8Flag = pCtx->sMb.pTransformSize8x8Flag[0];
-    pCurDq->pLumaQp         = pCtx->sMb.pLumaQp[0];
-    pCurDq->pChromaQp       = pCtx->sMb.pChromaQp[0];
-    pCurDq->pMvd[0]         = pCtx->sMb.pMvd[0][0];
-    pCurDq->pCbfDc          = pCtx->sMb.pCbfDc[0];
-    pCurDq->pNzc            = pCtx->sMb.pNzc[0];
-    pCurDq->pNzcRs          = pCtx->sMb.pNzcRs[0];
-    pCurDq->pScaledTCoeff   = pCtx->sMb.pScaledTCoeff[0];
-    pCurDq->pIntraPredMode  = pCtx->sMb.pIntraPredMode[0];
-    pCurDq->pIntra4x4FinalMode = pCtx->sMb.pIntra4x4FinalMode[0];
-    pCurDq->pIntraNxNAvailFlag = pCtx->sMb.pIntraNxNAvailFlag[0];
-    pCurDq->pChromaPredMode = pCtx->sMb.pChromaPredMode[0];
-    pCurDq->pCbp            = pCtx->sMb.pCbp[0];
-    pCurDq->pSubMbType      = pCtx->sMb.pSubMbType[0];
-    pCurDq->pInterPredictionDoneFlag = pCtx->sMb.pInterPredictionDoneFlag[0];
-    pCurDq->pResidualPredFlag = pCtx->sMb.pResidualPredFlag[0];
-    pCurDq->pMbCorrectlyDecodedFlag = pCtx->sMb.pMbCorrectlyDecodedFlag[0];
-    pCurDq->pMbRefConcealedFlag = pCtx->sMb.pMbRefConcealedFlag[0];
-  }
-}
-
-/*
- * DecodeCurrentAccessUnit
- * Decode current access unit when current AU is completed.
- */
-int32_t DecodeCurrentAccessUnit (PWelsDecoderContext pCtx, uint8_t** ppDst, SBufferInfo* pDstInfo) {
-  int32_t iRefCount[LIST_A];
-  PNalUnit pNalCur = NULL;
-  PAccessUnit pCurAu = pCtx->pAccessUnitList;
-
-  int32_t iIdx = pCurAu->uiStartPos;
-  int32_t iEndIdx = pCurAu->uiEndPos;
-
-  int32_t iPpsId = 0;
-  int32_t iRet = ERR_NONE;
-
-  bool bAllRefComplete = true; // Assume default all ref picutres are complete
-
-  const uint8_t kuiTargetLayerDqId = GetTargetDqId (pCtx->uiTargetDqId, pCtx->pParam);
-  const uint8_t kuiDependencyIdMax = (kuiTargetLayerDqId & 0x7F) >> 4;
-  int16_t iLastIdD = -1, iLastIdQ = -1;
-  int16_t iCurrIdD = 0, iCurrIdQ = 0;
-  uint8_t uiNalRefIdc = 0;
-  bool bFreshSliceAvailable =
-    true; // Another fresh slice comingup for given dq layer, for multiple slices in case of header parts of slices sometimes loss over error-prone channels, 8/14/2008
-
-  //update pCurDqLayer at the starting of AU decoding
-  if (pCtx->bInitialDqLayersMem) {
-    pCtx->pCurDqLayer = pCtx->pDqLayersList[0];
-  }
-
-  InitCurDqLayerData (pCtx, pCtx->pCurDqLayer);
-
-  pNalCur = pCurAu->pNalUnitsList[iIdx];
-  while (iIdx <= iEndIdx) {
-    PDqLayer dq_cur = pCtx->pCurDqLayer;
-    SLayerInfo pLayerInfo;
-    PSliceHeaderExt pShExt = NULL;
-    PSliceHeader pSh = NULL;
-
-    if (pCtx->pDec == NULL) {
-      pCtx->pDec = PrefetchPic (pCtx->pPicBuff[0]);
-      if (pCtx->iTotalNumMbRec != 0)
-        pCtx->iTotalNumMbRec = 0;
-
-      if (NULL == pCtx->pDec) {
-        WelsLog (& (pCtx->sLogCtx), WELS_LOG_ERROR,
-                 "DecodeCurrentAccessUnit()::::::PrefetchPic ERROR, pSps->iNumRefFrames:%d.",
-                 pCtx->pSps->iNumRefFrames);
-        // The error code here need to be separated from the dsOutOfMemory
-        pCtx->iErrorCode |= dsOutOfMemory;
-        return ERR_INFO_REF_COUNT_OVERFLOW;
-      }
-      pCtx->pDec->bNewSeqBegin = pCtx->bNewSeqBegin; //set flag for start decoding
-    } else if (pCtx->iTotalNumMbRec == 0) { //pDec != NULL, already start
-      pCtx->pDec->bNewSeqBegin = pCtx->bNewSeqBegin; //set flag for start decoding
-    }
-    pCtx->pDec->uiTimeStamp = pNalCur->uiTimeStamp;
-
-    if (pCtx->iTotalNumMbRec == 0) { //Picture start to decode
-      for (int32_t i = 0; i < LAYER_NUM_EXCHANGEABLE; ++ i)
-        memset (pCtx->sMb.pSliceIdc[i], 0xff, (pCtx->sMb.iMbWidth * pCtx->sMb.iMbHeight * sizeof (int32_t)));
-      memset (pCtx->pCurDqLayer->pMbCorrectlyDecodedFlag, 0, pCtx->pSps->iMbWidth * pCtx->pSps->iMbHeight * sizeof (bool));
-      memset (pCtx->pCurDqLayer->pMbRefConcealedFlag, 0, pCtx->pSps->iMbWidth * pCtx->pSps->iMbHeight * sizeof (bool));
-      pCtx->pDec->iMbNum = pCtx->pSps->iMbWidth * pCtx->pSps->iMbHeight;
-      pCtx->pDec->iMbEcedNum = 0;
-      pCtx->pDec->iMbEcedPropNum = 0;
-    }
-    pCtx->bRPLRError = false;
-    GetI4LumaIChromaAddrTable (pCtx->iDecBlockOffsetArray, pCtx->pDec->iLinesize[0], pCtx->pDec->iLinesize[1]);
-
-    if (pNalCur->sNalHeaderExt.uiLayerDqId > kuiTargetLayerDqId) { // confirmed pNalCur will never be NULL
-      break; // Per formance it need not to decode the remaining bits any more due to given uiLayerDqId required, 9/2/2009
-    }
-
-    memset (&pLayerInfo, 0, sizeof (SLayerInfo));
-
-    /*
-     *  Loop decoding for slices (even FMO and/ multiple slices) within a dq layer
-     */
-    while (iIdx <= iEndIdx) {
-      bool         bReconstructSlice;
-      iCurrIdQ  = pNalCur->sNalHeaderExt.uiQualityId;
-      iCurrIdD  = pNalCur->sNalHeaderExt.uiDependencyId;
-      pSh       = &pNalCur->sNalData.sVclNal.sSliceHeaderExt.sSliceHeader;
-      pShExt    = &pNalCur->sNalData.sVclNal.sSliceHeaderExt;
-      pCtx->bRPLRError = false;
-      bReconstructSlice = CheckSliceNeedReconstruct (pNalCur->sNalHeaderExt.uiLayerDqId, kuiTargetLayerDqId);
-
-      memcpy (&pLayerInfo.sNalHeaderExt, &pNalCur->sNalHeaderExt, sizeof (SNalUnitHeaderExt)); //confirmed_safe_unsafe_usage
-
-      pCtx->pDec->iFrameNum = pSh->iFrameNum;
-      pCtx->pDec->iFramePoc = pSh->iPicOrderCntLsb; // still can not obtain correct, because current do not support POCtype 2
-      pCtx->pDec->bIdrFlag = pNalCur->sNalHeaderExt.bIdrFlag;
-
-      memcpy (&pLayerInfo.sSliceInLayer.sSliceHeaderExt, pShExt, sizeof (SSliceHeaderExt)); //confirmed_safe_unsafe_usage
-      pLayerInfo.sSliceInLayer.bSliceHeaderExtFlag      = pNalCur->sNalData.sVclNal.bSliceHeaderExtFlag;
-      pLayerInfo.sSliceInLayer.eSliceType               = pSh->eSliceType;
-      pLayerInfo.sSliceInLayer.iLastMbQp                = pSh->iSliceQp;
-      dq_cur->pBitStringAux = &pNalCur->sNalData.sVclNal.sSliceBitsRead;
-
-      uiNalRefIdc = pNalCur->sNalHeaderExt.sNalUnitHeader.uiNalRefIdc;
-
-      iPpsId = pSh->iPpsId;
-
-      pLayerInfo.pPps = pSh->pPps;
-      pLayerInfo.pSps = pSh->pSps;
-      pLayerInfo.pSubsetSps = pShExt->pSubsetSps;
-
-      pCtx->pFmo = &pCtx->sFmoList[iPpsId];
-      iRet = FmoParamUpdate (pCtx->pFmo, pLayerInfo.pSps, pLayerInfo.pPps, &pCtx->iActiveFmoNum, pCtx->pMemAlign);
-      if (ERR_NONE != iRet) {
-        if (iRet == ERR_INFO_OUT_OF_MEMORY) {
-          pCtx->iErrorCode |= dsOutOfMemory;
-          WelsLog (& (pCtx->sLogCtx), WELS_LOG_ERROR, "DecodeCurrentAccessUnit(), Fmo param alloc failed");
-        } else {
-          pCtx->iErrorCode |= dsBitstreamError;
-          WelsLog (& (pCtx->sLogCtx), WELS_LOG_WARNING, "DecodeCurrentAccessUnit(), FmoParamUpdate failed, eSliceType: %d.",
-                   pSh->eSliceType);
-        }
-        return GENERATE_ERROR_NO (ERR_LEVEL_SLICE_HEADER, ERR_INFO_FMO_INIT_FAIL);
-      }
-
-      bFreshSliceAvailable = (iCurrIdD != iLastIdD
-                              || iCurrIdQ != iLastIdQ);        // do not need condition of (first_mb == 0) due multiple slices might be disorder
-
-      WelsDqLayerDecodeStart (pCtx, pNalCur, pLayerInfo.pSps, pLayerInfo.pPps);
-
-      if (iCurrIdQ == BASE_QUALITY_ID) {
-        ST64 (iRefCount, LD64 (pLayerInfo.sSliceInLayer.sSliceHeaderExt.sSliceHeader.uiRefCount));
-      }
-
-      if ((iLastIdD < 0) ||  //case 1: first layer
-          (iLastIdD == iCurrIdD)) { //case 2: same uiDId
-        InitDqLayerInfo (dq_cur, &pLayerInfo, pNalCur, pCtx->pDec);
-
-        if (!dq_cur->sLayerInfo.pSps->bGapsInFrameNumValueAllowedFlag) {
-          const bool kbIdrFlag = dq_cur->sLayerInfo.sNalHeaderExt.bIdrFlag
-                                 || (dq_cur->sLayerInfo.sNalHeaderExt.sNalUnitHeader.eNalUnitType == NAL_UNIT_CODED_SLICE_IDR);
-          // Subclause 8.2.5.2 Decoding process for gaps in frame_num
-          if (!kbIdrFlag  &&
-              pSh->iFrameNum != pCtx->iPrevFrameNum &&
-              pSh->iFrameNum != ((pCtx->iPrevFrameNum + 1) & ((1 << dq_cur->sLayerInfo.pSps->uiLog2MaxFrameNum) - 1))) {
-            WelsLog (& (pCtx->sLogCtx), WELS_LOG_WARNING,
-                     "referencing pictures lost due frame gaps exist, prev_frame_num: %d, curr_frame_num: %d", pCtx->iPrevFrameNum,
-                     pSh->iFrameNum);
-
-            bAllRefComplete = false;
-            pCtx->iErrorCode |= dsRefLost;
-            if (pCtx->pParam->eEcActiveIdc == ERROR_CON_DISABLE) {
-#ifdef LONG_TERM_REF
-              pCtx->bParamSetsLostFlag = true;
-#else
-              pCtx->bReferenceLostAtT0Flag = true;
-#endif
-              return GENERATE_ERROR_NO (ERR_LEVEL_SLICE_HEADER, ERR_INFO_REFERENCE_PIC_LOST);
-            }
-          }
-        }
-
-        if (iCurrIdD == kuiDependencyIdMax && iCurrIdQ == BASE_QUALITY_ID) {
-          iRet = InitRefPicList (pCtx, uiNalRefIdc, pSh->iPicOrderCntLsb);
-          if (iRet) {
-            pCtx->bRPLRError = true;
-            bAllRefComplete = false; // RPLR error, set ref pictures complete flag false
-            HandleReferenceLost (pCtx, pNalCur);
-            WelsLog (& (pCtx->sLogCtx), WELS_LOG_DEBUG,
-                     "reference picture introduced by this frame is lost during transmission! uiTId: %d",
-                     pNalCur->sNalHeaderExt.uiTemporalId);
-            if (pCtx->pParam->eEcActiveIdc == ERROR_CON_DISABLE) {
-              if (pCtx->iTotalNumMbRec == 0)
-                pCtx->pDec = NULL;
-              return iRet;
-            }
-          }
-        }
-
-        iRet = WelsDecodeSlice (pCtx, bFreshSliceAvailable, pNalCur);
-
-        //Output good store_base reconstruction when enhancement quality layer occurred error for MGS key picture case
-        if (iRet != ERR_NONE) {
-          WelsLog (& (pCtx->sLogCtx), WELS_LOG_WARNING,
-                   "DecodeCurrentAccessUnit() failed (%d) in frame: %d uiDId: %d uiQId: %d",
-                   iRet, pSh->iFrameNum, iCurrIdD, iCurrIdQ);
-          bAllRefComplete = false;
-          HandleReferenceLostL0 (pCtx, pNalCur);
-          if (pCtx->pParam->eEcActiveIdc == ERROR_CON_DISABLE) {
-            if (pCtx->iTotalNumMbRec == 0)
-              pCtx->pDec = NULL;
-            return iRet;
-          }
-        }
-
-        if (bReconstructSlice) {
-          if ((iRet = WelsDecodeConstructSlice (pCtx, pNalCur)) != ERR_NONE) {
-            pCtx->pDec->bIsComplete = false; // reconstruction error, directly set the flag false
-            return iRet;
-          }
-        }
-        if (bAllRefComplete && pCtx->eSliceType != I_SLICE) {
-          if (pCtx->sRefPic.uiRefCount[LIST_0] > 0) {
-            bAllRefComplete &= CheckRefPicturesComplete (pCtx);
-          } else {
-            bAllRefComplete = false;
-          }
-        }
-      }
-#if defined (_DEBUG) &&  !defined (CODEC_FOR_TESTBED)
-      WelsLog (& (pCtx->sLogCtx), WELS_LOG_DEBUG, "cur_frame : %d\tiCurrIdD : %d\n ",
-               dq_cur->sLayerInfo.sSliceInLayer.sSliceHeaderExt.sSliceHeader.iFrameNum, iCurrIdD);
-#endif//#if !CODEC_FOR_TESTBED
-      iLastIdD = iCurrIdD;
-      iLastIdQ = iCurrIdQ;
-
-      //pNalUnitsList overflow.
-      ++ iIdx;
-      if (iIdx <= iEndIdx) {
-        pNalCur = pCurAu->pNalUnitsList[iIdx];
-      } else {
-        pNalCur = NULL;
-      }
-
-      if (pNalCur == NULL ||
-          iLastIdD != pNalCur->sNalHeaderExt.uiDependencyId ||
-          iLastIdQ != pNalCur->sNalHeaderExt.uiQualityId)
-        break;
-    }
-
-    // Set the current dec picture complete flag. The flag will be reset when current picture need do ErrorCon.
-    pCtx->pDec->bIsComplete = bAllRefComplete;
-    if (!pCtx->pDec->bIsComplete) {  // Ref pictures ECed, result in ECed
-      pCtx->iErrorCode |= dsDataErrorConcealed;
-    }
-
-    // A dq layer decoded here
-#if defined (_DEBUG) &&  !defined (CODEC_FOR_TESTBED)
-#undef fprintf
-    WelsLog (& (pCtx->sLogCtx), WELS_LOG_DEBUG, "POC: #%d, FRAME: #%d, D: %d, Q: %d, T: %d, P: %d, %d\n",
-             pSh->iPicOrderCntLsb, pSh->iFrameNum, iCurrIdD, iCurrIdQ, dq_cur->sLayerInfo.sNalHeaderExt.uiTemporalId,
-             dq_cur->sLayerInfo.sNalHeaderExt.uiPriorityId, dq_cur->sLayerInfo.sSliceInLayer.sSliceHeaderExt.sSliceHeader.iSliceQp);
-#endif//#if !CODEC_FOR_TESTBED
-
-    if (dq_cur->uiLayerDqId == kuiTargetLayerDqId) {
-      if (!pCtx->bInstantDecFlag) {
-        if (!pCtx->pParam->bParseOnly) {
-          //Do error concealment here
-          if ((NeedErrorCon (pCtx)) && (pCtx->pParam->eEcActiveIdc != ERROR_CON_DISABLE)) {
-            ImplementErrorCon (pCtx);
-            pCtx->iTotalNumMbRec = pCtx->pSps->iMbWidth * pCtx->pSps->iMbHeight;
-            pCtx->pDec->iSpsId = pCtx->pSps->iSpsId;
-            pCtx->pDec->iPpsId = pCtx->pPps->iPpsId;
-          }
-        }
-      }
-
-      iRet = DecodeFrameConstruction (pCtx, ppDst, pDstInfo);
-      if (iRet)
-        return iRet;
-
-      pCtx->pPreviousDecodedPictureInDpb = pCtx->pDec; //store latest decoded picture for EC
-      if (uiNalRefIdc > 0) {
-        iRet = WelsMarkAsRef (pCtx);
-        if (iRet != ERR_NONE) {
-          if (iRet == ERR_INFO_DUPLICATE_FRAME_NUM)
-            pCtx->iErrorCode |= dsBitstreamError;
-          if (pCtx->pParam->eEcActiveIdc == ERROR_CON_DISABLE) {
-            pCtx->pDec = NULL;
-            return iRet;
-          }
-        }
-        if (!pCtx->pParam->bParseOnly)
-          ExpandReferencingPicture (pCtx->pDec->pData, pCtx->pDec->iWidthInPixel, pCtx->pDec->iHeightInPixel,
-                                    pCtx->pDec->iLinesize,
-                                    pCtx->sExpandPicFunc.pfExpandLumaPicture, pCtx->sExpandPicFunc.pfExpandChromaPicture);
-      }
-      pCtx->pDec = NULL; //after frame decoding, always set to NULL
-    }
-
-    // need update frame_num due current frame is well decoded
-    if (pCurAu->pNalUnitsList[pCurAu->uiStartPos]->sNalHeaderExt.sNalUnitHeader.uiNalRefIdc > 0)
-      pCtx->iPrevFrameNum = pSh->iFrameNum;
-    if (pCtx->bLastHasMmco5)
-      pCtx->iPrevFrameNum = 0;
-  }
-
-  return ERR_NONE;
-}
-
-bool CheckAndFinishLastPic (PWelsDecoderContext pCtx, uint8_t** ppDst, SBufferInfo* pDstInfo) {
-  PAccessUnit pAu = pCtx->pAccessUnitList;
-  bool bAuBoundaryFlag = false;
-  if (IS_VCL_NAL (pCtx->sCurNalHead.eNalUnitType, 1)) { //VCL data, AU list should have data
-    PNalUnit pCurNal = pAu->pNalUnitsList[pAu->uiEndPos];
-    bAuBoundaryFlag = (pCtx->iTotalNumMbRec != 0)
-                      && (CheckAccessUnitBoundaryExt (&pCtx->sLastNalHdrExt, &pCurNal->sNalHeaderExt, &pCtx->sLastSliceHeader,
-                          &pCurNal->sNalData.sVclNal.sSliceHeaderExt.sSliceHeader));
-  } else { //non VCL
-    if (pCtx->sCurNalHead.eNalUnitType == NAL_UNIT_AU_DELIMITER) {
-      bAuBoundaryFlag = true;
-    } else if (pCtx->sCurNalHead.eNalUnitType == NAL_UNIT_SEI) {
-      bAuBoundaryFlag = true;
-    } else if (pCtx->sCurNalHead.eNalUnitType == NAL_UNIT_SPS) {
-      bAuBoundaryFlag = !! (pCtx->iOverwriteFlags & OVERWRITE_SPS);
-    } else if (pCtx->sCurNalHead.eNalUnitType == NAL_UNIT_SUBSET_SPS) {
-      bAuBoundaryFlag = !! (pCtx->iOverwriteFlags & OVERWRITE_SUBSETSPS);
-    } else if (pCtx->sCurNalHead.eNalUnitType == NAL_UNIT_PPS) {
-      bAuBoundaryFlag = !! (pCtx->iOverwriteFlags & OVERWRITE_PPS);
-    }
-    if (bAuBoundaryFlag && pCtx->pAccessUnitList->uiAvailUnitsNum != 0) { //Construct remaining data first
-      ConstructAccessUnit (pCtx, ppDst, pDstInfo);
-    }
-  }
-
-  //Do Error Concealment here
-  if (bAuBoundaryFlag && (pCtx->iTotalNumMbRec != 0) && NeedErrorCon (pCtx)) { //AU ready but frame not completely reconed
-    if (pCtx->pParam->eEcActiveIdc != ERROR_CON_DISABLE) {
-      ImplementErrorCon (pCtx);
-      pCtx->iTotalNumMbRec = pCtx->pSps->iMbWidth * pCtx->pSps->iMbHeight;
-      pCtx->pDec->iSpsId = pCtx->pSps->iSpsId;
-      pCtx->pDec->iPpsId = pCtx->pPps->iPpsId;
-
-      DecodeFrameConstruction (pCtx, ppDst, pDstInfo);
-      pCtx->pPreviousDecodedPictureInDpb = pCtx->pDec; //save ECed pic for future use
-      if (pCtx->sLastNalHdrExt.sNalUnitHeader.uiNalRefIdc > 0) {
-        MarkECFrameAsRef (pCtx);
-      }
-    } else if (pCtx->pParam->bParseOnly) { //clear parse only internal data status
-      pCtx->pParserBsInfo->iNalNum = 0;
-      pCtx->bFrameFinish = true; //clear frame pending status here!
-    } else {
-      if (DecodeFrameConstruction (pCtx, ppDst, pDstInfo)) {
-        if ((pCtx->sLastNalHdrExt.sNalUnitHeader.uiNalRefIdc > 0) && (pCtx->sLastNalHdrExt.uiTemporalId == 0))
-          pCtx->iErrorCode |= dsNoParamSets;
-        else
-          pCtx->iErrorCode |= dsBitstreamError;
-        pCtx->pDec = NULL;
-        return false;
-      }
-    }
-    pCtx->pDec = NULL;
-    if (pAu->pNalUnitsList[pAu->uiStartPos]->sNalHeaderExt.sNalUnitHeader.uiNalRefIdc > 0)
-      pCtx->iPrevFrameNum = pCtx->sLastSliceHeader.iFrameNum; //save frame_num
-    if (pCtx->bLastHasMmco5)
-      pCtx->iPrevFrameNum = 0;
-  }
-  return ERR_NONE;
-}
-
-bool CheckRefPicturesComplete (PWelsDecoderContext pCtx) {
-  // Multi Reference, RefIdx may differ
-  bool bAllRefComplete = true;
-  int32_t iRealMbIdx = pCtx->pCurDqLayer->sLayerInfo.sSliceInLayer.sSliceHeaderExt.sSliceHeader.iFirstMbInSlice;
-  for (int32_t iMbIdx = 0; bAllRefComplete
-       && iMbIdx < pCtx->pCurDqLayer->sLayerInfo.sSliceInLayer.iTotalMbInCurSlice; iMbIdx++) {
-    switch (pCtx->pCurDqLayer->pMbType[iRealMbIdx]) {
-    case MB_TYPE_SKIP:
-    case MB_TYPE_16x16:
-      bAllRefComplete &= pCtx->sRefPic.pRefList[ LIST_0 ][ pCtx->pCurDqLayer->pRefIndex[0][iRealMbIdx][0] ]->bIsComplete;
-      break;
-
-    case MB_TYPE_16x8:
-      bAllRefComplete &= pCtx->sRefPic.pRefList[ LIST_0 ][ pCtx->pCurDqLayer->pRefIndex[0][iRealMbIdx][0] ]->bIsComplete;
-      bAllRefComplete &= pCtx->sRefPic.pRefList[ LIST_0 ][ pCtx->pCurDqLayer->pRefIndex[0][iRealMbIdx][8] ]->bIsComplete;
-      break;
-
-    case MB_TYPE_8x16:
-      bAllRefComplete &= pCtx->sRefPic.pRefList[ LIST_0 ][ pCtx->pCurDqLayer->pRefIndex[0][iRealMbIdx][0] ]->bIsComplete;
-      bAllRefComplete &= pCtx->sRefPic.pRefList[ LIST_0 ][ pCtx->pCurDqLayer->pRefIndex[0][iRealMbIdx][2] ]->bIsComplete;
-      break;
-
-    case MB_TYPE_8x8:
-    case MB_TYPE_8x8_REF0:
-      bAllRefComplete &= pCtx->sRefPic.pRefList[ LIST_0 ][ pCtx->pCurDqLayer->pRefIndex[0][iRealMbIdx][0] ]->bIsComplete;
-      bAllRefComplete &= pCtx->sRefPic.pRefList[ LIST_0 ][ pCtx->pCurDqLayer->pRefIndex[0][iRealMbIdx][2] ]->bIsComplete;
-      bAllRefComplete &= pCtx->sRefPic.pRefList[ LIST_0 ][ pCtx->pCurDqLayer->pRefIndex[0][iRealMbIdx][8] ]->bIsComplete;
-      bAllRefComplete &= pCtx->sRefPic.pRefList[ LIST_0 ][ pCtx->pCurDqLayer->pRefIndex[0][iRealMbIdx][10] ]->bIsComplete;
-      break;
-
-    default:
-      break;
-    }
-    iRealMbIdx = (pCtx->pPps->uiNumSliceGroups > 1) ? FmoNextMb (pCtx->pFmo, iRealMbIdx) :
-                 (pCtx->pCurDqLayer->sLayerInfo.sSliceInLayer.sSliceHeaderExt.sSliceHeader.iFirstMbInSlice + iMbIdx);
-    if (iRealMbIdx == -1) //caused by abnormal return of FmoNextMb()
-      return false;
-  }
-  return bAllRefComplete;
-}
-} // namespace WelsDec
+/*!
+ * \copy
+ *     Copyright (c)  2013, Cisco Systems
+ *     All rights reserved.
+ *
+ *     Redistribution and use in source and binary forms, with or without
+ *     modification, are permitted provided that the following conditions
+ *     are met:
+ *
+ *        * Redistributions of source code must retain the above copyright
+ *          notice, this list of conditions and the following disclaimer.
+ *
+ *        * Redistributions in binary form must reproduce the above copyright
+ *          notice, this list of conditions and the following disclaimer in
+ *          the documentation and/or other materials provided with the
+ *          distribution.
+ *
+ *     THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *     "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *     LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+ *     FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+ *     COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ *     INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ *     BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ *     LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+ *     CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ *     LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
+ *     ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ *     POSSIBILITY OF SUCH DAMAGE.
+ *
+ *      decoder_core.c: Wels decoder framework core implementation
+ */
+
+#include "decoder_core.h"
+#include "error_code.h"
+#include "memmgr_nal_unit.h"
+#include "au_parser.h"
+#include "decode_slice.h"
+#include "manage_dec_ref.h"
+#include "expand_pic.h"
+#include "decoder.h"
+#include "decode_mb_aux.h"
+#include "memory_align.h"
+#include "error_concealment.h"
+
+namespace WelsDec {
+static inline int32_t DecodeFrameConstruction (PWelsDecoderContext pCtx, uint8_t** ppDst, SBufferInfo* pDstInfo) {
+  PDqLayer pCurDq = pCtx->pCurDqLayer;
+  PPicture pPic = pCtx->pDec;
+
+  const int32_t kiWidth = pCurDq->iMbWidth << 4;
+  const int32_t kiHeight = pCurDq->iMbHeight << 4;
+
+  const int32_t kiTotalNumMbInCurLayer = pCurDq->iMbWidth * pCurDq->iMbHeight;
+  bool bFrameCompleteFlag = true;
+
+  if (pPic->bNewSeqBegin) {
+    memcpy (& (pCtx->sFrameCrop), & (pCurDq->sLayerInfo.sSliceInLayer.sSliceHeaderExt.sSliceHeader.pSps->sFrameCrop),
+            sizeof (SPosOffset)); //confirmed_safe_unsafe_usage
+#ifdef LONG_TERM_REF
+    pCtx->bParamSetsLostFlag      = false;
+#else
+    pCtx->bReferenceLostAtT0Flag = false; // need initialize it due new seq, 6/4/2010
+#endif //LONG_TERM_REF
+    if (pCtx->iTotalNumMbRec == kiTotalNumMbInCurLayer) {
+      pCtx->bPrintFrameErrorTraceFlag = true;
+      WelsLog (& (pCtx->sLogCtx), WELS_LOG_INFO,
+               "DecodeFrameConstruction(): will output first frame of new sequence, %d x %d, crop_left:%d, crop_right:%d, crop_top:%d, crop_bottom:%d, ignored error packet:%d.",
+               kiWidth, kiHeight, pCtx->sFrameCrop.iLeftOffset, pCtx->sFrameCrop.iRightOffset, pCtx->sFrameCrop.iTopOffset,
+               pCtx->sFrameCrop.iBottomOffset, pCtx->iIgnoredErrorInfoPacketCount);
+      pCtx->iIgnoredErrorInfoPacketCount = 0;
+    }
+  }
+
+  const int32_t kiActualWidth = kiWidth - (pCtx->sFrameCrop.iLeftOffset + pCtx->sFrameCrop.iRightOffset) * 2;
+  const int32_t kiActualHeight = kiHeight - (pCtx->sFrameCrop.iTopOffset + pCtx->sFrameCrop.iBottomOffset) * 2;
+
+
+  if (pCtx->pParam->eEcActiveIdc == ERROR_CON_DISABLE) {
+    if ((pCtx->sDecoderStatistics.uiWidth != (unsigned int) kiActualWidth)
+        || (pCtx->sDecoderStatistics.uiHeight != (unsigned int) kiActualHeight)) {
+      pCtx->sDecoderStatistics.uiResolutionChangeTimes++;
+      pCtx->sDecoderStatistics.uiWidth = kiActualWidth;
+      pCtx->sDecoderStatistics.uiHeight = kiActualHeight;
+    }
+    UpdateDecStatNoFreezingInfo (pCtx);
+  }
+
+  if (pCtx->pParam->bParseOnly) { //should exit for parse only to prevent access NULL pDstInfo
+    PAccessUnit pCurAu = pCtx->pAccessUnitList;
+    if (dsErrorFree == pCtx->iErrorCode) { //correct decoding, add to data buffer
+      SParserBsInfo* pParser = pCtx->pParserBsInfo;
+      SNalUnit* pCurNal = NULL;
+      int32_t iTotalNalLen = 0;
+      int32_t iNalLen = 0;
+      int32_t iNum = 0;
+      while (iNum < pParser->iNalNum) {
+        iTotalNalLen += pParser->pNalLenInByte[iNum++];
+      }
+      uint8_t* pDstBuf = pParser->pDstBuff + iTotalNalLen;
+      int32_t iIdx = pCurAu->uiStartPos;
+      int32_t iEndIdx = pCurAu->uiEndPos;
+      uint8_t* pNalBs = NULL;
+      pParser->uiOutBsTimeStamp = (pCurAu->pNalUnitsList [iIdx]) ? pCurAu->pNalUnitsList [iIdx]->uiTimeStamp : 0;
+      //pParser->iNalNum = 0;
+      pParser->iSpsWidthInPixel = (pCtx->pSps->iMbWidth << 4) - ((pCtx->pSps->sFrameCrop.iLeftOffset +
+                                  pCtx->pSps->sFrameCrop.iRightOffset) << 1);
+      pParser->iSpsHeightInPixel = (pCtx->pSps->iMbHeight << 4) - ((pCtx->pSps->sFrameCrop.iTopOffset +
+                                   pCtx->pSps->sFrameCrop.iBottomOffset) << 1);
+
+      if (pCurAu->pNalUnitsList [iIdx]->sNalHeaderExt.bIdrFlag) { //IDR
+        if (pCtx->bFrameFinish) { //add required sps/pps
+          if (pParser->iNalNum > pCtx->iMaxNalNum - 2) { //2 reserved for sps+pps
+            WelsLog (& (pCtx->sLogCtx), WELS_LOG_INFO,
+                     "DecodeFrameConstruction(): current NAL num (%d) plus sps & pps exceeds permitted num (%d). Will expand",
+                     pParser->iNalNum, pCtx->iMaxNalNum);
+            WELS_VERIFY_RETURN_IF (ERR_INFO_OUT_OF_MEMORY, ExpandBsLenBuffer (pCtx, pParser->iNalNum + 2))
+          }
+          bool bSubSps = (NAL_UNIT_CODED_SLICE_EXT == pCurAu->pNalUnitsList [iIdx]->sNalHeaderExt.sNalUnitHeader.eNalUnitType);
+          SSpsBsInfo* pSpsBs = NULL;
+          SPpsBsInfo* pPpsBs = NULL;
+          int32_t iSpsId = pCtx->pSps->iSpsId;
+          int32_t iPpsId = pCtx->pPps->iPpsId;
+          pCtx->bParamSetsLostFlag = false;
+          //find required sps, pps and write into dst buff
+          pSpsBs = bSubSps ? &pCtx->sSubsetSpsBsInfo [iSpsId] : &pCtx->sSpsBsInfo [iSpsId];
+          pPpsBs = &pCtx->sPpsBsInfo [iPpsId];
+          if (pDstBuf - pParser->pDstBuff + pSpsBs->uiSpsBsLen + pPpsBs->uiPpsBsLen >= MAX_ACCESS_UNIT_CAPACITY) {
+            WelsLog (& (pCtx->sLogCtx), WELS_LOG_ERROR,
+                     "DecodeFrameConstruction(): sps pps size: (%d %d) too large. Failed to parse. \n", pSpsBs->uiSpsBsLen,
+                     pPpsBs->uiPpsBsLen);
+            pCtx->iErrorCode |= dsOutOfMemory;
+            pCtx->pParserBsInfo->iNalNum = 0;
+            return ERR_INFO_OUT_OF_MEMORY;
+          }
+          memcpy (pDstBuf, pSpsBs->pSpsBsBuf, pSpsBs->uiSpsBsLen);
+          pParser->pNalLenInByte [pParser->iNalNum ++] = pSpsBs->uiSpsBsLen;
+          pDstBuf += pSpsBs->uiSpsBsLen;
+          memcpy (pDstBuf, pPpsBs->pPpsBsBuf, pPpsBs->uiPpsBsLen);
+          pParser->pNalLenInByte [pParser->iNalNum ++] = pPpsBs->uiPpsBsLen;
+          pDstBuf += pPpsBs->uiPpsBsLen;
+          pCtx->bFrameFinish = false;
+        }
+      }
+      //then VCL data re-write
+      if (pParser->iNalNum + iEndIdx - iIdx + 1 > pCtx->iMaxNalNum) { //calculate total NAL num
+        WelsLog (& (pCtx->sLogCtx), WELS_LOG_INFO,
+                 "DecodeFrameConstruction(): current NAL num (%d) exceeds permitted num (%d). Will expand",
+                 pParser->iNalNum + iEndIdx - iIdx + 1, pCtx->iMaxNalNum);
+        WELS_VERIFY_RETURN_IF (ERR_INFO_OUT_OF_MEMORY, ExpandBsLenBuffer (pCtx, pParser->iNalNum + iEndIdx - iIdx + 1))
+      }
+      while (iIdx <= iEndIdx) {
+        pCurNal = pCurAu->pNalUnitsList [iIdx ++];
+        iNalLen = pCurNal->sNalData.sVclNal.iNalLength;
+        pNalBs = pCurNal->sNalData.sVclNal.pNalPos;
+        pParser->pNalLenInByte [pParser->iNalNum ++] = iNalLen;
+        if (pDstBuf - pParser->pDstBuff + iNalLen >= MAX_ACCESS_UNIT_CAPACITY) {
+          WelsLog (& (pCtx->sLogCtx), WELS_LOG_ERROR,
+                   "DecodeFrameConstruction(): composed output size (%ld) exceeds (%d). Failed to parse. current data pos %d out of %d:, previously accumulated num: %d, total num: %d, previously accumulated len: %d, current len: %d, current buf pos: %p, header buf pos: %p \n",
+                   (long) (pDstBuf - pParser->pDstBuff + iNalLen), MAX_ACCESS_UNIT_CAPACITY, iIdx, iEndIdx, iNum, pParser->iNalNum,
+                   iTotalNalLen, iNalLen, pDstBuf, pParser->pDstBuff);
+          pCtx->iErrorCode |= dsOutOfMemory;
+          pCtx->pParserBsInfo->iNalNum = 0;
+          return ERR_INFO_OUT_OF_MEMORY;
+        }
+
+        memcpy (pDstBuf, pNalBs, iNalLen);
+        pDstBuf += iNalLen;
+      }
+      if (pCtx->iTotalNumMbRec == kiTotalNumMbInCurLayer) { //frame complete
+        pCtx->iTotalNumMbRec = 0;
+        pCtx->bFramePending = false;
+        pCtx->bFrameFinish = true; //finish current frame and mark it
+      } else if (pCtx->iTotalNumMbRec != 0) { //frame incomplete
+        pCtx->bFramePending = true;
+        pCtx->pDec->bIsComplete = false;
+        pCtx->bFrameFinish = false; //current frame not finished
+        pCtx->iErrorCode |= dsFramePending;
+        return ERR_INFO_PARSEONLY_PENDING;
+        //pCtx->pParserBsInfo->iNalNum = 0;
+      }
+    } else { //error
+      pCtx->pParserBsInfo->uiOutBsTimeStamp = 0;
+      pCtx->pParserBsInfo->iNalNum = 0;
+      pCtx->pParserBsInfo->iSpsWidthInPixel = 0;
+      pCtx->pParserBsInfo->iSpsHeightInPixel = 0;
+      return ERR_INFO_PARSEONLY_ERROR;
+    }
+    return ERR_NONE;
+  }
+
+  if (pCtx->iTotalNumMbRec != kiTotalNumMbInCurLayer) {
+    WelsLog (& (pCtx->sLogCtx), WELS_LOG_DEBUG,
+             "DecodeFrameConstruction(): iTotalNumMbRec:%d, total_num_mb_sps:%d, cur_layer_mb_width:%d, cur_layer_mb_height:%d ",
+             pCtx->iTotalNumMbRec, kiTotalNumMbInCurLayer, pCurDq->iMbWidth, pCurDq->iMbHeight);
+    bFrameCompleteFlag = false; //return later after output buffer is done
+    if (pCtx->bInstantDecFlag) //no-delay decoding, wait for new slice
+      return ERR_INFO_MB_NUM_INADEQUATE;
+  } else if (pCurDq->sLayerInfo.sNalHeaderExt.bIdrFlag
+             && (pCtx->iErrorCode == dsErrorFree)) { //complete non-ECed IDR frame done
+    pCtx->pDec->bIsComplete = true;
+    pCtx->bFreezeOutput = false;
+  }
+
+  pCtx->iTotalNumMbRec = 0;
+
+  //////output:::normal path
+  pDstInfo->uiOutYuvTimeStamp = pPic->uiTimeStamp;
+  ppDst[0]      = pPic->pData[0];
+  ppDst[1]      = pPic->pData[1];
+  ppDst[2]      = pPic->pData[2];
+
+  pDstInfo->UsrData.sSystemBuffer.iFormat = videoFormatI420;
+
+  pDstInfo->UsrData.sSystemBuffer.iWidth = kiActualWidth;
+  pDstInfo->UsrData.sSystemBuffer.iHeight = kiActualHeight;
+  pDstInfo->UsrData.sSystemBuffer.iStride[0] = pPic->iLinesize[0];
+  pDstInfo->UsrData.sSystemBuffer.iStride[1] = pPic->iLinesize[1];
+  ppDst[0] = ppDst[0] + pCtx->sFrameCrop.iTopOffset * 2 * pPic->iLinesize[0] + pCtx->sFrameCrop.iLeftOffset * 2;
+  ppDst[1] = ppDst[1] + pCtx->sFrameCrop.iTopOffset  * pPic->iLinesize[1] + pCtx->sFrameCrop.iLeftOffset;
+  ppDst[2] = ppDst[2] + pCtx->sFrameCrop.iTopOffset  * pPic->iLinesize[1] + pCtx->sFrameCrop.iLeftOffset;
+  pDstInfo->iBufferStatus = 1;
+
+  bool bOutResChange = (pCtx->iLastImgWidthInPixel != pDstInfo->UsrData.sSystemBuffer.iWidth)
+                       || (pCtx->iLastImgHeightInPixel != pDstInfo->UsrData.sSystemBuffer.iHeight);
+  pCtx->iLastImgWidthInPixel = pDstInfo->UsrData.sSystemBuffer.iWidth;
+  pCtx->iLastImgHeightInPixel = pDstInfo->UsrData.sSystemBuffer.iHeight;
+  if (pCtx->pParam->eEcActiveIdc == ERROR_CON_DISABLE) //no buffer output if EC is disabled and frame incomplete
+    pDstInfo->iBufferStatus = (int32_t) (bFrameCompleteFlag
+                                         && pPic->bIsComplete); // When EC disable, ECed picture not output
+  else if ((pCtx->pParam->eEcActiveIdc == ERROR_CON_SLICE_COPY_CROSS_IDR_FREEZE_RES_CHANGE
+            || pCtx->pParam->eEcActiveIdc == ERROR_CON_SLICE_MV_COPY_CROSS_IDR_FREEZE_RES_CHANGE)
+           && pCtx->iErrorCode && bOutResChange)
+    pCtx->bFreezeOutput = true;
+
+  if (pDstInfo->iBufferStatus == 0) {
+    if (!bFrameCompleteFlag)
+      pCtx->iErrorCode |= dsBitstreamError;
+    return ERR_INFO_MB_NUM_INADEQUATE;
+  }
+  if (pCtx->bFreezeOutput) {
+    pDstInfo->iBufferStatus = 0;
+    if (pPic->bNewSeqBegin) {
+      WelsLog (& (pCtx->sLogCtx), WELS_LOG_INFO,
+               "DecodeFrameConstruction():New sequence detected, but freezed, correct MBs (%d) out of whole MBs (%d).",
+               kiTotalNumMbInCurLayer - pCtx->iMbEcedNum, kiTotalNumMbInCurLayer);
+    }
+  }
+  pCtx->iMbEcedNum = pPic->iMbEcedNum;
+  pCtx->iMbNum = pPic->iMbNum;
+  pCtx->iMbEcedPropNum = pPic->iMbEcedPropNum;
+  if (pCtx->pParam->eEcActiveIdc != ERROR_CON_DISABLE) {
+    if (pDstInfo->iBufferStatus && ((pCtx->sDecoderStatistics.uiWidth != (unsigned int) kiActualWidth)
+                                    || (pCtx->sDecoderStatistics.uiHeight != (unsigned int) kiActualHeight))) {
+      pCtx->sDecoderStatistics.uiResolutionChangeTimes++;
+      pCtx->sDecoderStatistics.uiWidth = kiActualWidth;
+      pCtx->sDecoderStatistics.uiHeight = kiActualHeight;
+    }
+    UpdateDecStat (pCtx, pDstInfo->iBufferStatus != 0);
+  }
+  return ERR_NONE;
+}
+
+inline bool    CheckSliceNeedReconstruct (uint8_t uiLayerDqId, uint8_t uiTargetDqId) {
+  return (uiLayerDqId == uiTargetDqId); // target layer
+}
+
+inline uint8_t GetTargetDqId (uint8_t uiTargetDqId,  SDecodingParam* psParam) {
+  uint8_t  uiRequiredDqId = psParam ? psParam->uiTargetDqLayer : (uint8_t)255;
+
+  return WELS_MIN (uiTargetDqId, uiRequiredDqId);
+}
+
+
+inline void    HandleReferenceLostL0 (PWelsDecoderContext pCtx, PNalUnit pCurNal) {
+  if (0 == pCurNal->sNalHeaderExt.uiTemporalId) {
+    pCtx->bReferenceLostAtT0Flag = true;
+  }
+  pCtx->iErrorCode |= dsBitstreamError;
+}
+
+inline void    HandleReferenceLost (PWelsDecoderContext pCtx, PNalUnit pCurNal) {
+  if ((0 == pCurNal->sNalHeaderExt.uiTemporalId) || (1 == pCurNal->sNalHeaderExt.uiTemporalId)) {
+    pCtx->bReferenceLostAtT0Flag = true;
+  }
+  pCtx->iErrorCode |= dsRefLost;
+}
+
+inline int32_t  WelsDecodeConstructSlice (PWelsDecoderContext pCtx, PNalUnit pCurNal) {
+  int32_t  iRet = WelsTargetSliceConstruction (pCtx);
+
+  if (iRet) {
+    HandleReferenceLostL0 (pCtx, pCurNal);
+  }
+
+  return iRet;
+}
+
+int32_t ParsePredWeightedTable (PBitStringAux pBs, PSliceHeader pSh) {
+  uint32_t uiCode;
+  int32_t iList = 0;
+  int32_t iCode;
+
+  WELS_READ_VERIFY (BsGetUe (pBs, &uiCode));
+  WELS_CHECK_SE_BOTH_ERROR_NOLOG (uiCode, 0, 7, "luma_log2_weight_denom",
+                                  GENERATE_ERROR_NO (ERR_LEVEL_SLICE_HEADER, ERR_INFO_INVALID_LUMA_LOG2_WEIGHT_DENOM));
+  pSh->sPredWeightTable.uiLumaLog2WeightDenom = uiCode;
+  if (pSh->pSps->uiChromaArrayType != 0) {
+    WELS_READ_VERIFY (BsGetUe (pBs, &uiCode));
+    WELS_CHECK_SE_BOTH_ERROR_NOLOG (uiCode, 0, 7, "chroma_log2_weight_denom",
+                                    GENERATE_ERROR_NO (ERR_LEVEL_SLICE_HEADER, ERR_INFO_INVALID_CHROMA_LOG2_WEIGHT_DENOM));
+    pSh->sPredWeightTable.uiChromaLog2WeightDenom = uiCode;
+  }
+
+  if ((pSh->sPredWeightTable.uiLumaLog2WeightDenom | pSh->sPredWeightTable.uiChromaLog2WeightDenom) > 7)
+    return ERR_NONE;
+
+  do {
+
+    for (int i = 0; i < pSh->uiRefCount[iList]; i++) {
+      //luma
+      WELS_READ_VERIFY (BsGetOneBit (pBs, &uiCode));
+      if (!!uiCode) {
+
+        WELS_READ_VERIFY (BsGetSe (pBs, &iCode));
+        WELS_CHECK_SE_BOTH_ERROR_NOLOG (iCode, -128, 127, "luma_weight",
+                                        GENERATE_ERROR_NO (ERR_LEVEL_SLICE_HEADER, ERR_INFO_INVALID_LUMA_WEIGHT));
+        pSh->sPredWeightTable.sPredList[iList].iLumaWeight[i] = iCode;
+
+        WELS_READ_VERIFY (BsGetSe (pBs, &iCode));
+        WELS_CHECK_SE_BOTH_ERROR_NOLOG (iCode, -128, 127, "luma_offset",
+                                        GENERATE_ERROR_NO (ERR_LEVEL_SLICE_HEADER, ERR_INFO_INVALID_LUMA_OFFSET));
+        pSh->sPredWeightTable.sPredList[iList].iLumaOffset[i] = iCode;
+      } else {
+        pSh->sPredWeightTable.sPredList[iList].iLumaWeight[i] = 1 << (pSh->sPredWeightTable.uiLumaLog2WeightDenom);
+        pSh->sPredWeightTable.sPredList[iList].iLumaOffset[i] = 0;
+
+      }
+      //chroma
+      if (pSh->pSps->uiChromaArrayType == 0)
+        continue;
+
+      WELS_READ_VERIFY (BsGetOneBit (pBs, &uiCode));
+      if (!!uiCode) {
+        for (int j = 0; j < 2; j++) {
+
+
+          WELS_READ_VERIFY (BsGetSe (pBs, &iCode));
+          WELS_CHECK_SE_BOTH_ERROR_NOLOG (iCode, -128, 127, "chroma_weight",
+                                          GENERATE_ERROR_NO (ERR_LEVEL_SLICE_HEADER, ERR_INFO_INVALID_CHROMA_WEIGHT));
+          pSh->sPredWeightTable.sPredList[iList].iChromaWeight[i][j] = iCode;
+
+          WELS_READ_VERIFY (BsGetSe (pBs, &iCode));
+          WELS_CHECK_SE_BOTH_ERROR_NOLOG (iCode, -128, 127, "chroma_offset",
+                                          GENERATE_ERROR_NO (ERR_LEVEL_SLICE_HEADER, ERR_INFO_INVALID_CHROMA_OFFSET));
+          pSh->sPredWeightTable.sPredList[iList].iChromaOffset[i][j] = iCode;
+        }
+      } else {
+        for (int j = 0; j < 2; j++) {
+
+
+          pSh->sPredWeightTable.sPredList[iList].iChromaWeight[i][j] = 1 << (pSh->sPredWeightTable.uiChromaLog2WeightDenom);
+          pSh->sPredWeightTable.sPredList[iList].iChromaOffset[i][j] = 0;
+        }
+      }
+
+    }
+    ++iList;
+    if (pSh->eSliceType != B_SLICE) {
+      break;
+    }
+  } while (iList < LIST_A);//TODO: SUPPORT LIST_A
+  return ERR_NONE;
+}
+
+void CreateImplicitWeightTable (PWelsDecoderContext pCtx) {
+
+  PSlice pSlice = &pCtx->pCurDqLayer->sLayerInfo.sSliceInLayer;
+  PSliceHeader pSliceHeader = &pSlice->sSliceHeaderExt.sSliceHeader;
+  PDqLayer pCurDqLayer = pCtx->pCurDqLayer;
+  if (pCurDqLayer->bUseWeightedBiPredIdc && pSliceHeader->pPps->uiWeightedBipredIdc == 2) {
+    int32_t iPoc = pSliceHeader->iPicOrderCntLsb;
+
+    if (pSliceHeader->uiRefCount[0] == 1 && pSliceHeader->uiRefCount[1] == 1
+        && pCtx->sRefPic.pRefList[LIST_0][0]->iFramePoc + pCtx->sRefPic.pRefList[LIST_1][0]->iFramePoc == 2 * iPoc) {
+      pCurDqLayer->bUseWeightedBiPredIdc = false;
+      return;
+    }
+
+    pCurDqLayer->pPredWeightTable->uiLumaLog2WeightDenom = 5;
+    pCurDqLayer->pPredWeightTable->uiChromaLog2WeightDenom = 5;
+    for (int32_t iRef0 = 0; iRef0 < pSliceHeader->uiRefCount[0]; iRef0++) {
+      if (pCtx->sRefPic.pRefList[LIST_0][iRef0]) {
+        const int32_t iPoc0 = pCtx->sRefPic.pRefList[LIST_0][iRef0]->iFramePoc;
+        bool bIsLongRef0 = pCtx->sRefPic.pRefList[LIST_0][iRef0]->bIsLongRef;
+        for (int32_t iRef1 = 0; iRef1 < pSliceHeader->uiRefCount[1]; iRef1++) {
+          if (pCtx->sRefPic.pRefList[LIST_1][iRef1]) {
+            const int32_t iPoc1 = pCtx->sRefPic.pRefList[LIST_1][iRef1]->iFramePoc;
+            bool bIsLongRef1 = pCtx->sRefPic.pRefList[LIST_1][iRef1]->bIsLongRef;
+            pCurDqLayer->pPredWeightTable->iImplicitWeight[iRef0][iRef1] = 32;
+            if (!bIsLongRef0 && !bIsLongRef1) {
+              const int32_t iTd = WELS_CLIP3 (iPoc1 - iPoc0, -128, 127);
+              if (iTd) {
+                int32_t iTb = WELS_CLIP3 (iPoc - iPoc0, -128, 127);
+                int32_t iTx = (16384 + (WELS_ABS (iTd) >> 1)) / iTd;
+                int32_t iDistScaleFactor = (iTb * iTx + 32) >> 8;
+                if (iDistScaleFactor >= -64 && iDistScaleFactor <= 128) {
+                  pCurDqLayer->pPredWeightTable->iImplicitWeight[iRef0][iRef1] = 64 - iDistScaleFactor;
+                }
+              }
+            }
+          }
+        }
+      }
+    }
+  }
+  return;
+}
+
+/*
+ *  Predeclared function routines ..
+ */
+int32_t ParseRefPicListReordering (PBitStringAux pBs, PSliceHeader pSh) {
+  int32_t iList = 0;
+  const EWelsSliceType keSt = pSh->eSliceType;
+  PRefPicListReorderSyn pRefPicListReordering = &pSh->pRefPicListReordering;
+  PSps pSps = pSh->pSps;
+  uint32_t uiCode;
+  if (keSt == I_SLICE || keSt == SI_SLICE)
+    return ERR_NONE;
+
+  // Common syntaxs for P or B slices: list0, list1 followed if B slices used.
+  do {
+    WELS_READ_VERIFY (BsGetOneBit (pBs, &uiCode)); //ref_pic_list_modification_flag_l0
+    pRefPicListReordering->bRefPicListReorderingFlag[iList] = !!uiCode;
+
+    if (pRefPicListReordering->bRefPicListReorderingFlag[iList]) {
+      int32_t iIdx = 0;
+      do {
+        WELS_READ_VERIFY (BsGetUe (pBs, &uiCode)); //modification_of_pic_nums_idc
+        const uint32_t kuiIdc = uiCode;
+
+        //Fixed the referrence list reordering crash issue.(fault kIdc value > 3 case)---
+        if ((iIdx >= MAX_REF_PIC_COUNT) || (kuiIdc > 3)) {
+          return GENERATE_ERROR_NO (ERR_LEVEL_SLICE_HEADER, ERR_INFO_INVALID_REF_REORDERING);
+        }
+        pRefPicListReordering->sReorderingSyn[iList][iIdx].uiReorderingOfPicNumsIdc = kuiIdc;
+        if (kuiIdc == 3)
+          break;
+
+        if (iIdx >= pSh->uiRefCount[iList] || iIdx >= MAX_REF_PIC_COUNT)
+          return GENERATE_ERROR_NO (ERR_LEVEL_SLICE_HEADER, ERR_INFO_INVALID_REF_REORDERING);
+
+        if (kuiIdc == 0 || kuiIdc == 1) {
+          // abs_diff_pic_num_minus1 should be in range 0 to MaxPicNum-1, MaxPicNum is derived as
+          // 2^(4+log2_max_frame_num_minus4)
+          WELS_READ_VERIFY (BsGetUe (pBs, &uiCode)); //abs_diff_pic_num_minus1
+          WELS_CHECK_SE_UPPER_ERROR_NOLOG (uiCode, (uint32_t) (1 << pSps->uiLog2MaxFrameNum), "abs_diff_pic_num_minus1",
+                                           GENERATE_ERROR_NO (ERR_LEVEL_SLICE_HEADER, ERR_INFO_INVALID_REF_REORDERING));
+          pRefPicListReordering->sReorderingSyn[iList][iIdx].uiAbsDiffPicNumMinus1 = uiCode; // uiAbsDiffPicNumMinus1
+        } else if (kuiIdc == 2) {
+          WELS_READ_VERIFY (BsGetUe (pBs, &uiCode)); //long_term_pic_num
+          pRefPicListReordering->sReorderingSyn[iList][iIdx].uiLongTermPicNum = uiCode;
+        }
+
+        ++ iIdx;
+      } while (true);
+    }
+    if (keSt != B_SLICE)
+      break;
+    ++ iList;
+  } while (iList < LIST_A);
+
+  return ERR_NONE;
+}
+
+int32_t ParseDecRefPicMarking (PWelsDecoderContext pCtx, PBitStringAux pBs, PSliceHeader pSh, PSps pSps,
+                               const bool kbIdrFlag) {
+  PRefPicMarking const kpRefMarking = &pSh->sRefMarking;
+  uint32_t uiCode;
+  if (kbIdrFlag) {
+    WELS_READ_VERIFY (BsGetOneBit (pBs, &uiCode)); //no_output_of_prior_pics_flag
+    kpRefMarking->bNoOutputOfPriorPicsFlag = !!uiCode;
+    WELS_READ_VERIFY (BsGetOneBit (pBs, &uiCode)); //long_term_reference_flag
+    kpRefMarking->bLongTermRefFlag = !!uiCode;
+  } else {
+    WELS_READ_VERIFY (BsGetOneBit (pBs, &uiCode)); //adaptive_ref_pic_marking_mode_flag
+    kpRefMarking->bAdaptiveRefPicMarkingModeFlag = !!uiCode;
+    if (kpRefMarking->bAdaptiveRefPicMarkingModeFlag) {
+      int32_t iIdx = 0;
+      bool bAllowMmco5 = true, bMmco4Exist = false, bMmco5Exist = false, bMmco6Exist = false;
+      do {
+        WELS_READ_VERIFY (BsGetUe (pBs, &uiCode)); //memory_management_control_operation
+        const uint32_t kuiMmco = uiCode;
+
+        kpRefMarking->sMmcoRef[iIdx].uiMmcoType = kuiMmco;
+        if (kuiMmco == MMCO_END)
+          break;
+
+        if (kuiMmco == MMCO_SHORT2UNUSED || kuiMmco == MMCO_SHORT2LONG) {
+          bAllowMmco5 = false;
+          WELS_READ_VERIFY (BsGetUe (pBs, &uiCode)); //difference_of_pic_nums_minus1
+          kpRefMarking->sMmcoRef[iIdx].iDiffOfPicNum = 1 + uiCode;
+          kpRefMarking->sMmcoRef[iIdx].iShortFrameNum = (pSh->iFrameNum - kpRefMarking->sMmcoRef[iIdx].iDiffOfPicNum) & ((
+                1 << pSps->uiLog2MaxFrameNum) - 1);
+        } else if (kuiMmco == MMCO_LONG2UNUSED) {
+          bAllowMmco5 = false;
+          WELS_READ_VERIFY (BsGetUe (pBs, &uiCode)); //long_term_pic_num
+          kpRefMarking->sMmcoRef[iIdx].uiLongTermPicNum = uiCode;
+        }
+        if (kuiMmco == MMCO_SHORT2LONG || kuiMmco == MMCO_LONG) {
+          if (kuiMmco == MMCO_LONG) {
+            WELS_VERIFY_RETURN_IF (-1, bMmco6Exist);
+            bMmco6Exist = true;
+          }
+          WELS_READ_VERIFY (BsGetUe (pBs, &uiCode)); //long_term_frame_idx
+          kpRefMarking->sMmcoRef[iIdx].iLongTermFrameIdx = uiCode;
+        } else if (kuiMmco == MMCO_SET_MAX_LONG) {
+          WELS_VERIFY_RETURN_IF (-1, bMmco4Exist);
+          bMmco4Exist = true;
+          WELS_READ_VERIFY (BsGetUe (pBs, &uiCode)); //max_long_term_frame_idx_plus1
+          kpRefMarking->sMmcoRef[iIdx].iMaxLongTermFrameIdx = -1 + uiCode;
+        } else if (kuiMmco == MMCO_RESET) {
+          WELS_VERIFY_RETURN_IF (-1, (!bAllowMmco5 || bMmco5Exist));
+          bMmco5Exist = true;
+
+          pCtx->iPrevPicOrderCntLsb = 0;
+          pCtx->iPrevPicOrderCntMsb = 0;
+          pCtx->pSliceHeader->iPicOrderCntLsb = 0;
+        }
+        ++ iIdx;
+
+      } while (iIdx < MAX_MMCO_COUNT);
+    }
+  }
+
+  return ERR_NONE;
+}
+
+bool FillDefaultSliceHeaderExt (PSliceHeaderExt pShExt, PNalUnitHeaderExt pNalExt) {
+  if (pShExt == NULL || pNalExt == NULL)
+    return false;
+
+  if (pNalExt->iNoInterLayerPredFlag || pNalExt->uiQualityId > 0)
+    pShExt->bBasePredWeightTableFlag = false;
+  else
+    pShExt->bBasePredWeightTableFlag = true;
+  pShExt->uiRefLayerDqId = (uint8_t) - 1;
+  pShExt->uiDisableInterLayerDeblockingFilterIdc        = 0;
+  pShExt->iInterLayerSliceAlphaC0Offset                 = 0;
+  pShExt->iInterLayerSliceBetaOffset                    = 0;
+  pShExt->bConstrainedIntraResamplingFlag               = false;
+  pShExt->uiRefLayerChromaPhaseXPlus1Flag               = 0;
+  pShExt->uiRefLayerChromaPhaseYPlus1                   = 1;
+  //memset(&pShExt->sScaledRefLayer, 0, sizeof(SPosOffset));
+
+  pShExt->iScaledRefLayerPicWidthInSampleLuma   = pShExt->sSliceHeader.iMbWidth << 4;
+  pShExt->iScaledRefLayerPicHeightInSampleLuma  = pShExt->sSliceHeader.iMbHeight << 4;
+
+  pShExt->bSliceSkipFlag                = false;
+  pShExt->bAdaptiveBaseModeFlag         = false;
+  pShExt->bDefaultBaseModeFlag          = false;
+  pShExt->bAdaptiveMotionPredFlag       = false;
+  pShExt->bDefaultMotionPredFlag        = false;
+  pShExt->bAdaptiveResidualPredFlag     = false;
+  pShExt->bDefaultResidualPredFlag      = false;
+  pShExt->bTCoeffLevelPredFlag          = false;
+  pShExt->uiScanIdxStart                = 0;
+  pShExt->uiScanIdxEnd                  = 15;
+
+  return true;
+}
+
+int32_t InitBsBuffer (PWelsDecoderContext pCtx) {
+  if (pCtx == NULL)
+    return ERR_INFO_INVALID_PTR;
+
+  CMemoryAlign* pMa = pCtx->pMemAlign;
+
+  pCtx->iMaxBsBufferSizeInByte = MIN_ACCESS_UNIT_CAPACITY * MAX_BUFFERED_NUM;
+  if ((pCtx->sRawData.pHead = static_cast<uint8_t*> (pMa->WelsMallocz (pCtx->iMaxBsBufferSizeInByte,
+                              "pCtx->sRawData.pHead"))) == NULL) {
+    return ERR_INFO_OUT_OF_MEMORY;
+  }
+  pCtx->sRawData.pStartPos = pCtx->sRawData.pCurPos = pCtx->sRawData.pHead;
+  pCtx->sRawData.pEnd = pCtx->sRawData.pHead + pCtx->iMaxBsBufferSizeInByte;
+  if (pCtx->pParam->bParseOnly) {
+    pCtx->pParserBsInfo = static_cast<SParserBsInfo*> (pMa->WelsMallocz (sizeof (SParserBsInfo), "pCtx->pParserBsInfo"));
+    if (pCtx->pParserBsInfo == NULL) {
+      return ERR_INFO_OUT_OF_MEMORY;
+    }
+    memset (pCtx->pParserBsInfo, 0, sizeof (SParserBsInfo));
+    pCtx->pParserBsInfo->pDstBuff = static_cast<uint8_t*> (pMa->WelsMallocz (MAX_ACCESS_UNIT_CAPACITY * sizeof (uint8_t),
+                                    "pCtx->pParserBsInfo->pDstBuff"));
+    if (pCtx->pParserBsInfo->pDstBuff == NULL) {
+      return ERR_INFO_OUT_OF_MEMORY;
+    }
+    memset (pCtx->pParserBsInfo->pDstBuff, 0, MAX_ACCESS_UNIT_CAPACITY * sizeof (uint8_t));
+
+    if ((pCtx->sSavedData.pHead = static_cast<uint8_t*> (pMa->WelsMallocz (pCtx->iMaxBsBufferSizeInByte,
+                                  "pCtx->sSavedData.pHead"))) == NULL) {
+      return ERR_INFO_OUT_OF_MEMORY;
+    }
+    pCtx->sSavedData.pStartPos = pCtx->sSavedData.pCurPos = pCtx->sSavedData.pHead;
+    pCtx->sSavedData.pEnd = pCtx->sSavedData.pHead + pCtx->iMaxBsBufferSizeInByte;
+
+    pCtx->iMaxNalNum = MAX_NAL_UNITS_IN_LAYER + 2; //2 reserved for SPS+PPS
+    pCtx->pParserBsInfo->pNalLenInByte = static_cast<int*> (pMa->WelsMallocz (pCtx->iMaxNalNum * sizeof (int),
+                                         "pCtx->pParserBsInfo->pNalLenInByte"));
+    if (pCtx->pParserBsInfo->pNalLenInByte == NULL) {
+      return ERR_INFO_OUT_OF_MEMORY;
+    }
+  }
+  return ERR_NONE;
+}
+
+int32_t ExpandBsBuffer (PWelsDecoderContext pCtx, const int kiSrcLen) {
+  if (pCtx == NULL)
+    return ERR_INFO_INVALID_PTR;
+  int32_t iExpandStepShift = 1;
+  int32_t iNewBuffLen = WELS_MAX ((kiSrcLen * MAX_BUFFERED_NUM), (pCtx->iMaxBsBufferSizeInByte << iExpandStepShift));
+  //allocate new bs buffer
+  CMemoryAlign* pMa = pCtx->pMemAlign;
+
+  //Realloc sRawData
+  uint8_t* pNewBsBuff = static_cast<uint8_t*> (pMa->WelsMallocz (iNewBuffLen, "pCtx->sRawData.pHead"));
+  if (pNewBsBuff == NULL) {
+    WelsLog (& (pCtx->sLogCtx), WELS_LOG_ERROR, "ExpandBsBuffer() Failed for malloc pNewBsBuff (%d)", iNewBuffLen);
+    pCtx->iErrorCode |= dsOutOfMemory;
+    return ERR_INFO_OUT_OF_MEMORY;
+  }
+
+  //Calculate and set the bs start and end position
+  for (uint32_t i = 0; i <= pCtx->pAccessUnitList->uiActualUnitsNum; i++) {
+    PBitStringAux pSliceBitsRead = &pCtx->pAccessUnitList->pNalUnitsList[i]->sNalData.sVclNal.sSliceBitsRead;
+    pSliceBitsRead->pStartBuf = pSliceBitsRead->pStartBuf - pCtx->sRawData.pHead + pNewBsBuff;
+    pSliceBitsRead->pEndBuf   = pSliceBitsRead->pEndBuf   - pCtx->sRawData.pHead + pNewBsBuff;
+    pSliceBitsRead->pCurBuf   = pSliceBitsRead->pCurBuf   - pCtx->sRawData.pHead + pNewBsBuff;
+  }
+
+  //Copy current buffer status to new buffer
+  memcpy (pNewBsBuff, pCtx->sRawData.pHead, pCtx->iMaxBsBufferSizeInByte);
+  pCtx->sRawData.pStartPos = pNewBsBuff + (pCtx->sRawData.pStartPos - pCtx->sRawData.pHead);
+  pCtx->sRawData.pCurPos   = pNewBsBuff + (pCtx->sRawData.pCurPos   - pCtx->sRawData.pHead);
+  pCtx->sRawData.pEnd      = pNewBsBuff + iNewBuffLen;
+  pMa->WelsFree (pCtx->sRawData.pHead, "pCtx->sRawData.pHead");
+  pCtx->sRawData.pHead = pNewBsBuff;
+
+  if (pCtx->pParam->bParseOnly) {
+    //Realloc sSavedData
+    uint8_t* pNewSavedBsBuff = static_cast<uint8_t*> (pMa->WelsMallocz (iNewBuffLen, "pCtx->sSavedData.pHead"));
+    if (pNewSavedBsBuff == NULL) {
+      WelsLog (& (pCtx->sLogCtx), WELS_LOG_ERROR, "ExpandBsBuffer() Failed for malloc pNewSavedBsBuff (%d)", iNewBuffLen);
+      pCtx->iErrorCode |= dsOutOfMemory;
+      return ERR_INFO_OUT_OF_MEMORY;
+    }
+
+    //Copy current buffer status to new buffer
+    memcpy (pNewSavedBsBuff, pCtx->sSavedData.pHead, pCtx->iMaxBsBufferSizeInByte);
+    pCtx->sSavedData.pStartPos = pNewSavedBsBuff + (pCtx->sSavedData.pStartPos - pCtx->sSavedData.pHead);
+    pCtx->sSavedData.pCurPos   = pNewSavedBsBuff + (pCtx->sSavedData.pCurPos   - pCtx->sSavedData.pHead);
+    pCtx->sSavedData.pEnd      = pNewSavedBsBuff + iNewBuffLen;
+    pMa->WelsFree (pCtx->sSavedData.pHead, "pCtx->sSavedData.pHead");
+    pCtx->sSavedData.pHead = pNewSavedBsBuff;
+  }
+
+  pCtx->iMaxBsBufferSizeInByte = iNewBuffLen;
+  return ERR_NONE;
+}
+
+int32_t ExpandBsLenBuffer (PWelsDecoderContext pCtx, const int kiCurrLen) {
+  SParserBsInfo* pParser = pCtx->pParserBsInfo;
+  if (!pParser->pNalLenInByte)
+    return ERR_INFO_INVALID_ACCESS;
+
+  int iNewLen = kiCurrLen;
+  if (kiCurrLen >= MAX_MB_SIZE + 2) { //exceeds the max MB number of level 5.2
+    WelsLog (& (pCtx->sLogCtx), WELS_LOG_WARNING, "Current nal num (%d) exceededs %d.", kiCurrLen, MAX_MB_SIZE);
+    pCtx->iErrorCode |= dsOutOfMemory;
+    return ERR_INFO_OUT_OF_MEMORY;
+  } else {
+    iNewLen = kiCurrLen << 1;
+    iNewLen = WELS_MIN (iNewLen, MAX_MB_SIZE + 2);
+  }
+
+  CMemoryAlign* pMa = pCtx->pMemAlign;
+  int* pNewLenBuffer = static_cast<int*> (pMa->WelsMallocz (iNewLen * sizeof (int),
+                                          "pCtx->pParserBsInfo->pNalLenInByte"));
+  if (pNewLenBuffer == NULL) {
+    pCtx->iErrorCode |= dsOutOfMemory;
+    return ERR_INFO_OUT_OF_MEMORY;
+  }
+
+  //copy existing data from old length buffer to new
+  memcpy (pNewLenBuffer, pParser->pNalLenInByte, pCtx->iMaxNalNum * sizeof (int));
+  pMa->WelsFree (pParser->pNalLenInByte, "pCtx->pParserBsInfo->pNalLenInByte");
+  pParser->pNalLenInByte = pNewLenBuffer;
+  pCtx->iMaxNalNum = iNewLen;
+  return ERR_NONE;
+}
+
+int32_t CheckBsBuffer (PWelsDecoderContext pCtx, const int32_t kiSrcLen) {
+  if (kiSrcLen > MAX_ACCESS_UNIT_CAPACITY) { //exceeds max allowed data
+    WelsLog (& (pCtx->sLogCtx), WELS_LOG_WARNING, "Max AU size exceeded. Allowed size = %d, current size = %d",
+             MAX_ACCESS_UNIT_CAPACITY,
+             kiSrcLen);
+    pCtx->iErrorCode |= dsBitstreamError;
+    return ERR_INFO_INVALID_ACCESS;
+  } else if (kiSrcLen > pCtx->iMaxBsBufferSizeInByte /
+             MAX_BUFFERED_NUM) { //may lead to buffer overwrite, prevent it by expanding buffer
+    if (ExpandBsBuffer (pCtx, kiSrcLen)) {
+      return ERR_INFO_OUT_OF_MEMORY;
+    }
+  }
+
+  return ERR_NONE;
+}
+
+/*
+ * WelsInitStaticMemory
+ * Memory request for new introduced data
+ * Especially for:
+ * rbsp_au_buffer, cur_dq_layer_ptr and ref_dq_layer_ptr in MB info cache.
+ * return:
+ *  0 - success; otherwise returned error_no defined in error_no.h.
+*/
+int32_t WelsInitStaticMemory (PWelsDecoderContext pCtx) {
+  if (pCtx == NULL) {
+    return ERR_INFO_INVALID_PTR;
+  }
+
+  if (MemInitNalList (&pCtx->pAccessUnitList, MAX_NAL_UNIT_NUM_IN_AU, pCtx->pMemAlign) != 0)
+    return ERR_INFO_OUT_OF_MEMORY;
+
+  if (InitBsBuffer (pCtx) != 0)
+    return ERR_INFO_OUT_OF_MEMORY;
+
+  pCtx->uiTargetDqId            = (uint8_t) - 1;
+  pCtx->bEndOfStreamFlag        = false;
+
+  return ERR_NONE;
+}
+
+/*
+ * WelsFreeStaticMemory
+ * Free memory introduced in WelsInitStaticMemory at destruction of decoder.
+ *
+ */
+void WelsFreeStaticMemory (PWelsDecoderContext pCtx) {
+  if (pCtx == NULL)
+    return;
+
+  CMemoryAlign* pMa = pCtx->pMemAlign;
+
+  MemFreeNalList (&pCtx->pAccessUnitList, pMa);
+
+  if (pCtx->sRawData.pHead) {
+    pMa->WelsFree (pCtx->sRawData.pHead, "pCtx->sRawData->pHead");
+  }
+  pCtx->sRawData.pHead                = NULL;
+  pCtx->sRawData.pEnd                 = NULL;
+  pCtx->sRawData.pStartPos            = NULL;
+  pCtx->sRawData.pCurPos              = NULL;
+  if (pCtx->pParam->bParseOnly) {
+    if (pCtx->sSavedData.pHead) {
+      pMa->WelsFree (pCtx->sSavedData.pHead, "pCtx->sSavedData->pHead");
+    }
+    pCtx->sSavedData.pHead                = NULL;
+    pCtx->sSavedData.pEnd                 = NULL;
+    pCtx->sSavedData.pStartPos            = NULL;
+    pCtx->sSavedData.pCurPos              = NULL;
+    if (pCtx->pParserBsInfo) {
+      if (pCtx->pParserBsInfo->pNalLenInByte) {
+        pMa->WelsFree (pCtx->pParserBsInfo->pNalLenInByte, "pCtx->pParserBsInfo->pNalLenInByte");
+        pCtx->pParserBsInfo->pNalLenInByte = NULL;
+        pCtx->iMaxNalNum = 0;
+      }
+      if (pCtx->pParserBsInfo->pDstBuff) {
+        pMa->WelsFree (pCtx->pParserBsInfo->pDstBuff, "pCtx->pParserBsInfo->pDstBuff");
+        pCtx->pParserBsInfo->pDstBuff = NULL;
+      }
+      pMa->WelsFree (pCtx->pParserBsInfo, "pCtx->pParserBsInfo");
+      pCtx->pParserBsInfo = NULL;
+    }
+  }
+
+  if (NULL != pCtx->pParam) {
+    pMa->WelsFree (pCtx->pParam, "pCtx->pParam");
+
+    pCtx->pParam = NULL;
+  }
+}
+/*
+ *  DecodeNalHeaderExt
+ *  Trigger condition: NAL_UNIT_TYPE = NAL_UNIT_PREFIX or NAL_UNIT_CODED_SLICE_EXT
+ *  Parameter:
+ *  pNal:   target NALUnit ptr
+ *  pSrc:   NAL Unit bitstream
+ */
+void DecodeNalHeaderExt (PNalUnit pNal, uint8_t* pSrc) {
+  PNalUnitHeaderExt pHeaderExt = &pNal->sNalHeaderExt;
+
+  uint8_t uiCurByte = *pSrc;
+  pHeaderExt->bIdrFlag              = !! (uiCurByte & 0x40);
+  pHeaderExt->uiPriorityId          = uiCurByte & 0x3F;
+
+  uiCurByte = * (++pSrc);
+  pHeaderExt->iNoInterLayerPredFlag = uiCurByte >> 7;
+  pHeaderExt->uiDependencyId        = (uiCurByte & 0x70) >> 4;
+  pHeaderExt->uiQualityId           = uiCurByte & 0x0F;
+  uiCurByte = * (++pSrc);
+  pHeaderExt->uiTemporalId          = uiCurByte >> 5;
+  pHeaderExt->bUseRefBasePicFlag    = !! (uiCurByte & 0x10);
+  pHeaderExt->bDiscardableFlag      = !! (uiCurByte & 0x08);
+  pHeaderExt->bOutputFlag           = !! (uiCurByte & 0x04);
+  pHeaderExt->uiReservedThree2Bits  = uiCurByte & 0x03;
+  pHeaderExt->uiLayerDqId           = (pHeaderExt->uiDependencyId << 4) | pHeaderExt->uiQualityId;
+}
+
+
+void UpdateDecoderStatisticsForActiveParaset (SDecoderStatistics* pDecoderStatistics,
+    PSps pSps, PPps pPps) {
+  pDecoderStatistics->iCurrentActiveSpsId = pSps->iSpsId;
+
+  pDecoderStatistics->iCurrentActivePpsId = pPps->iPpsId;
+  pDecoderStatistics->uiProfile = static_cast<unsigned int> (pSps->uiProfileIdc);
+  pDecoderStatistics->uiLevel = pSps->uiLevelIdc;
+}
+
+#define SLICE_HEADER_IDR_PIC_ID_MAX 65535
+#define SLICE_HEADER_REDUNDANT_PIC_CNT_MAX 127
+#define SLICE_HEADER_ALPHAC0_BETA_OFFSET_MIN -12
+#define SLICE_HEADER_ALPHAC0_BETA_OFFSET_MAX 12
+#define SLICE_HEADER_INTER_LAYER_ALPHAC0_BETA_OFFSET_MIN -12
+#define SLICE_HEADER_INTER_LAYER_ALPHAC0_BETA_OFFSET_MAX 12
+#define MAX_NUM_REF_IDX_L0_ACTIVE_MINUS1 15
+#define MAX_NUM_REF_IDX_L1_ACTIVE_MINUS1 15
+#define SLICE_HEADER_CABAC_INIT_IDC_MAX 2
+/*
+ *  decode_slice_header_avc
+ *  Parse slice header of bitstream in avc for storing data structure
+ */
+int32_t ParseSliceHeaderSyntaxs (PWelsDecoderContext pCtx, PBitStringAux pBs, const bool kbExtensionFlag) {
+  PNalUnit const kpCurNal               = pCtx->pAccessUnitList->pNalUnitsList[pCtx->pAccessUnitList->uiAvailUnitsNum -
+                                                                                 1];
+
+  PNalUnitHeaderExt pNalHeaderExt       = NULL;
+  PSliceHeader pSliceHead               = NULL;
+  PSliceHeaderExt pSliceHeadExt         = NULL;
+  PSubsetSps pSubsetSps                 = NULL;
+  PSps pSps                             = NULL;
+  PPps pPps                             = NULL;
+  EWelsNalUnitType eNalType             = static_cast<EWelsNalUnitType> (0);
+  int32_t iPpsId                        = 0;
+  int32_t iRet                          = ERR_NONE;
+  uint8_t uiSliceType                   = 0;
+  uint8_t uiQualityId                   = BASE_QUALITY_ID;
+  bool  bIdrFlag                        = false;
+  bool  bSgChangeCycleInvolved          = false;        // involved slice group change cycle ?
+  uint32_t uiCode;
+  int32_t iCode;
+  SLogContext* pLogCtx = & (pCtx->sLogCtx);
+
+  if (kpCurNal == NULL) {
+    return ERR_INFO_OUT_OF_MEMORY;
+  }
+
+  pNalHeaderExt = &kpCurNal->sNalHeaderExt;
+  pSliceHead    = &kpCurNal->sNalData.sVclNal.sSliceHeaderExt.sSliceHeader;
+  eNalType      = pNalHeaderExt->sNalUnitHeader.eNalUnitType;
+
+  pSliceHeadExt = &kpCurNal->sNalData.sVclNal.sSliceHeaderExt;
+
+  if (pSliceHeadExt) {
+    SRefBasePicMarking sBaseMarking;
+    const bool kbStoreRefBaseFlag = pSliceHeadExt->bStoreRefBasePicFlag;
+    memcpy (&sBaseMarking, &pSliceHeadExt->sRefBasePicMarking, sizeof (SRefBasePicMarking)); //confirmed_safe_unsafe_usage
+    memset (pSliceHeadExt, 0, sizeof (SSliceHeaderExt));
+    pSliceHeadExt->bStoreRefBasePicFlag = kbStoreRefBaseFlag;
+    memcpy (&pSliceHeadExt->sRefBasePicMarking, &sBaseMarking, sizeof (SRefBasePicMarking)); //confirmed_safe_unsafe_usage
+  }
+
+  kpCurNal->sNalData.sVclNal.bSliceHeaderExtFlag = kbExtensionFlag;
+
+  // first_mb_in_slice
+  WELS_READ_VERIFY (BsGetUe (pBs, &uiCode)); //first_mb_in_slice
+  WELS_CHECK_SE_UPPER_ERROR (uiCode, 36863u, "first_mb_in_slice", GENERATE_ERROR_NO (ERR_LEVEL_SLICE_HEADER,
+                             ERR_INFO_INVALID_FIRST_MB_IN_SLICE));
+  pSliceHead->iFirstMbInSlice = uiCode;
+
+  WELS_READ_VERIFY (BsGetUe (pBs, &uiCode)); //slice_type
+  uiSliceType = uiCode;
+  if (uiSliceType > 9) {
+    WelsLog (pLogCtx, WELS_LOG_WARNING, "slice type too large (%d) at first_mb(%d)", uiSliceType,
+             pSliceHead->iFirstMbInSlice);
+    return GENERATE_ERROR_NO (ERR_LEVEL_SLICE_HEADER, ERR_INFO_INVALID_SLICE_TYPE);
+  }
+  if (uiSliceType > 4)
+    uiSliceType -= 5;
+
+  if ((NAL_UNIT_CODED_SLICE_IDR == eNalType) && (I_SLICE != uiSliceType)) {
+    WelsLog (pLogCtx, WELS_LOG_WARNING, "Invalid slice type(%d) in IDR picture. ", uiSliceType);
+    return GENERATE_ERROR_NO (ERR_LEVEL_SLICE_HEADER, ERR_INFO_INVALID_SLICE_TYPE);
+  }
+
+  if (kbExtensionFlag) {
+    if (uiSliceType > 2) {
+      WelsLog (pLogCtx, WELS_LOG_WARNING, "Invalid slice type(%d).", uiSliceType);
+      return GENERATE_ERROR_NO (ERR_LEVEL_SLICE_HEADER, ERR_INFO_INVALID_SLICE_TYPE);
+    }
+  }
+
+  pSliceHead->eSliceType = static_cast <EWelsSliceType> (uiSliceType);
+
+  WELS_READ_VERIFY (BsGetUe (pBs, &uiCode)); //pic_parameter_set_id
+  WELS_CHECK_SE_UPPER_ERROR (uiCode, (MAX_PPS_COUNT - 1), "iPpsId out of range",
+                             GENERATE_ERROR_NO (ERR_LEVEL_SLICE_HEADER,
+                                 ERR_INFO_PPS_ID_OVERFLOW));
+  iPpsId = uiCode;
+
+  //add check PPS available here
+  if (pCtx->bPpsAvailFlags[iPpsId] == false) {
+    pCtx->sDecoderStatistics.iPpsReportErrorNum++;
+    if (pCtx->iPPSLastInvalidId != iPpsId) {
+      WelsLog (pLogCtx, WELS_LOG_ERROR, "PPS id (%d) is invalid, previous id (%d) error ignored (%d)!", iPpsId,
+               pCtx->iPPSLastInvalidId, pCtx->iPPSInvalidNum);
+      pCtx->iPPSLastInvalidId = iPpsId;
+      pCtx->iPPSInvalidNum = 0;
+    } else {
+      pCtx->iPPSInvalidNum++;
+    }
+    pCtx->iErrorCode |= dsNoParamSets;
+    return GENERATE_ERROR_NO (ERR_LEVEL_SLICE_HEADER, ERR_INFO_INVALID_PPS_ID);
+  }
+  pCtx->iPPSLastInvalidId = -1;
+
+  pPps    = &pCtx->sPpsBuffer[iPpsId];
+
+  if (pPps->uiNumSliceGroups == 0) {
+    WelsLog (pLogCtx, WELS_LOG_WARNING, "Invalid PPS referenced");
+    pCtx->iErrorCode |= dsNoParamSets;
+    return GENERATE_ERROR_NO (ERR_LEVEL_SLICE_HEADER, ERR_INFO_NO_PARAM_SETS);
+  }
+
+  if (kbExtensionFlag) {
+    pSubsetSps      = &pCtx->sSubsetSpsBuffer[pPps->iSpsId];
+    pSps            = &pSubsetSps->sSps;
+    if (pCtx->bSubspsAvailFlags[pPps->iSpsId] == false) {
+      pCtx->sDecoderStatistics.iSubSpsReportErrorNum++;
+      if (pCtx->iSubSPSLastInvalidId != pPps->iSpsId) {
+        WelsLog (pLogCtx, WELS_LOG_ERROR, "Sub SPS id (%d) is invalid, previous id (%d) error ignored (%d)!", pPps->iSpsId,
+                 pCtx->iSubSPSLastInvalidId, pCtx->iSubSPSInvalidNum);
+        pCtx->iSubSPSLastInvalidId = pPps->iSpsId;
+        pCtx->iSubSPSInvalidNum = 0;
+      } else {
+        pCtx->iSubSPSInvalidNum++;
+      }
+      pCtx->iErrorCode |= dsNoParamSets;
+      return GENERATE_ERROR_NO (ERR_LEVEL_SLICE_HEADER, ERR_INFO_INVALID_SPS_ID);
+    }
+    pCtx->iSubSPSLastInvalidId = -1;
+  } else {
+    if (pCtx->bSpsAvailFlags[pPps->iSpsId] == false) {
+      pCtx->sDecoderStatistics.iSpsReportErrorNum++;
+      if (pCtx->iSPSLastInvalidId != pPps->iSpsId) {
+        WelsLog (pLogCtx, WELS_LOG_ERROR, "SPS id (%d) is invalid, previous id (%d) error ignored (%d)!", pPps->iSpsId,
+                 pCtx->iSPSLastInvalidId, pCtx->iSPSInvalidNum);
+        pCtx->iSPSLastInvalidId = pPps->iSpsId;
+        pCtx->iSPSInvalidNum = 0;
+      } else {
+        pCtx->iSPSInvalidNum++;
+      }
+      pCtx->iErrorCode |= dsNoParamSets;
+      return GENERATE_ERROR_NO (ERR_LEVEL_SLICE_HEADER, ERR_INFO_INVALID_SPS_ID);
+    }
+    pCtx->iSPSLastInvalidId = -1;
+    pSps = &pCtx->sSpsBuffer[pPps->iSpsId];
+  }
+  pSliceHead->iPpsId = iPpsId;
+  pSliceHead->iSpsId = pPps->iSpsId;
+  pSliceHead->pPps   = pPps;
+  pSliceHead->pSps   = pSps;
+
+  pSliceHeadExt->pSubsetSps = pSubsetSps;
+
+  if (pSps->iNumRefFrames == 0) {
+    if ((uiSliceType != I_SLICE) && (uiSliceType != SI_SLICE)) {
+      WelsLog (pLogCtx, WELS_LOG_WARNING, "slice_type (%d) not supported for num_ref_frames = 0.", uiSliceType);
+      return GENERATE_ERROR_NO (ERR_LEVEL_SLICE_HEADER, ERR_INFO_INVALID_SLICE_TYPE);
+    }
+  }
+
+  bIdrFlag = (!kbExtensionFlag && eNalType == NAL_UNIT_CODED_SLICE_IDR) || (kbExtensionFlag && pNalHeaderExt->bIdrFlag);
+  pSliceHead->bIdrFlag = bIdrFlag;
+
+  if (pSps->uiLog2MaxFrameNum == 0) {
+    WelsLog (pLogCtx, WELS_LOG_WARNING, "non existing SPS referenced");
+    return GENERATE_ERROR_NO (ERR_LEVEL_SLICE_HEADER, ERR_INFO_NO_PARAM_SETS);
+  }
+  // check first_mb_in_slice
+  WELS_CHECK_SE_UPPER_ERROR ((uint32_t) (pSliceHead->iFirstMbInSlice), (pSps->uiTotalMbCount - 1), "first_mb_in_slice",
+                             GENERATE_ERROR_NO (ERR_LEVEL_SLICE_HEADER, ERR_INFO_INVALID_FIRST_MB_IN_SLICE));
+  WELS_READ_VERIFY (BsGetBits (pBs, pSps->uiLog2MaxFrameNum, &uiCode)); //frame_num
+  pSliceHead->iFrameNum = uiCode;
+
+  pSliceHead->bFieldPicFlag    = false;
+  pSliceHead->bBottomFiledFlag = false;
+  if (!pSps->bFrameMbsOnlyFlag) {
+    WelsLog (pLogCtx, WELS_LOG_WARNING, "ParseSliceHeaderSyntaxs(): frame_mbs_only_flag = %d not supported. ",
+             pSps->bFrameMbsOnlyFlag);
+    return GENERATE_ERROR_NO (ERR_LEVEL_SLICE_HEADER, ERR_INFO_UNSUPPORTED_MBAFF);
+  }
+  pSliceHead->iMbWidth  = pSps->iMbWidth;
+  pSliceHead->iMbHeight = pSps->iMbHeight / (1 + pSliceHead->bFieldPicFlag);
+
+  if (bIdrFlag) {
+    if (pSliceHead->iFrameNum != 0) {
+      WelsLog (pLogCtx, WELS_LOG_WARNING,
+               "ParseSliceHeaderSyntaxs(), invaild frame number: %d due to IDR frame introduced!",
+               pSliceHead->iFrameNum);
+      return GENERATE_ERROR_NO (ERR_LEVEL_SLICE_HEADER, ERR_INFO_INVALID_FRAME_NUM);
+    }
+    WELS_READ_VERIFY (BsGetUe (pBs, &uiCode)); //idr_pic_id
+    // standard 7.4.3 idr_pic_id should be in range 0 to 65535, inclusive.
+    WELS_CHECK_SE_UPPER_ERROR (uiCode, SLICE_HEADER_IDR_PIC_ID_MAX, "idr_pic_id", GENERATE_ERROR_NO (ERR_LEVEL_SLICE_HEADER,
+                               ERR_INFO_INVALID_IDR_PIC_ID));
+    pSliceHead->uiIdrPicId = uiCode; /* uiIdrPicId */
+#ifdef LONG_TERM_REF
+    pCtx->uiCurIdrPicId = pSliceHead->uiIdrPicId;
+#endif
+  }
+
+  pSliceHead->iDeltaPicOrderCntBottom = 0;
+  pSliceHead->iDeltaPicOrderCnt[0] =
+    pSliceHead->iDeltaPicOrderCnt[1] = 0;
+  if (pSps->uiPocType == 0) {
+    WELS_READ_VERIFY (BsGetBits (pBs, pSps->iLog2MaxPocLsb, &uiCode)); //pic_order_cnt_lsb
+    const int32_t iMaxPocLsb = 1 << (pSps->iLog2MaxPocLsb);
+    pSliceHead->iPicOrderCntLsb = uiCode;
+    if (pPps->bPicOrderPresentFlag && !pSliceHead->bFieldPicFlag) {
+      WELS_READ_VERIFY (BsGetSe (pBs, &iCode)); //delta_pic_order_cnt_bottom
+      pSliceHead->iDeltaPicOrderCntBottom = iCode;
+    }
+    //Calculate poc if necessary
+    int32_t pocLsb = pSliceHead->iPicOrderCntLsb;
+    if (pSliceHead->bIdrFlag || kpCurNal->sNalHeaderExt.sNalUnitHeader.eNalUnitType == NAL_UNIT_CODED_SLICE_IDR) {
+      pCtx->iPrevPicOrderCntMsb = 0;
+      pCtx->iPrevPicOrderCntLsb = 0;
+    }
+    int32_t pocMsb;
+    if (pocLsb < pCtx->iPrevPicOrderCntLsb && pCtx->iPrevPicOrderCntLsb - pocLsb >= iMaxPocLsb / 2)
+      pocMsb = pCtx->iPrevPicOrderCntMsb + iMaxPocLsb;
+    else if (pocLsb > pCtx->iPrevPicOrderCntLsb && pocLsb - pCtx->iPrevPicOrderCntLsb > iMaxPocLsb / 2)
+      pocMsb = pCtx->iPrevPicOrderCntMsb - iMaxPocLsb;
+    else
+      pocMsb = pCtx->iPrevPicOrderCntMsb;
+    pSliceHead->iPicOrderCntLsb = pocMsb + pocLsb;
+
+    if (pPps->bPicOrderPresentFlag && !pSliceHead->bFieldPicFlag) {
+      pSliceHead->iPicOrderCntLsb += pSliceHead->iDeltaPicOrderCntBottom;
+    }
+
+    if (kpCurNal->sNalHeaderExt.sNalUnitHeader.uiNalRefIdc != 0) {
+      pCtx->iPrevPicOrderCntLsb = pocLsb;
+      pCtx->iPrevPicOrderCntMsb = pocMsb;
+    }
+    //End of Calculating poc
+  } else if (pSps->uiPocType == 1 && !pSps->bDeltaPicOrderAlwaysZeroFlag) {
+    WELS_READ_VERIFY (BsGetSe (pBs, &iCode)); //delta_pic_order_cnt[ 0 ]
+    pSliceHead->iDeltaPicOrderCnt[0] = iCode;
+    if (pPps->bPicOrderPresentFlag && !pSliceHead->bFieldPicFlag) {
+      WELS_READ_VERIFY (BsGetSe (pBs, &iCode)); //delta_pic_order_cnt[ 1 ]
+      pSliceHead->iDeltaPicOrderCnt[1] = iCode;
+    }
+  }
+  pSliceHead->iRedundantPicCnt = 0;
+  if (pPps->bRedundantPicCntPresentFlag) {
+    WELS_READ_VERIFY (BsGetUe (pBs, &uiCode)); //redundant_pic_cnt
+    // standard section 7.4.3, redundant_pic_cnt should be in range 0 to 127, inclusive.
+    WELS_CHECK_SE_UPPER_ERROR (uiCode, SLICE_HEADER_REDUNDANT_PIC_CNT_MAX, "redundant_pic_cnt",
+                               GENERATE_ERROR_NO (ERR_LEVEL_SLICE_HEADER, ERR_INFO_INVALID_REDUNDANT_PIC_CNT));
+    pSliceHead->iRedundantPicCnt = uiCode;
+    if (pSliceHead->iRedundantPicCnt > 0) {
+      WelsLog (pLogCtx, WELS_LOG_WARNING, "Redundant picture not supported!");
+      return GENERATE_ERROR_NO (ERR_LEVEL_SLICE_HEADER, ERR_INFO_INVALID_REDUNDANT_PIC_CNT);
+    }
+  }
+
+  if (B_SLICE == uiSliceType) {
+    //fix me: it needs to use the this flag somewhere for B-Sclice
+    WELS_READ_VERIFY (BsGetOneBit (pBs, &uiCode)); //direct_spatial_mv_pred_flag
+    pSliceHead->iDirectSpatialMvPredFlag = uiCode;
+  }
+
+  //set defaults, might be overriden a few line later
+  pSliceHead->uiRefCount[0] = pPps->uiNumRefIdxL0Active;
+  pSliceHead->uiRefCount[1] = pPps->uiNumRefIdxL1Active;
+
+  bool bReadNumRefFlag = (P_SLICE == uiSliceType || B_SLICE == uiSliceType);
+  if (kbExtensionFlag) {
+    bReadNumRefFlag &= (BASE_QUALITY_ID == pNalHeaderExt->uiQualityId);
+  }
+  if (bReadNumRefFlag) {
+    WELS_READ_VERIFY (BsGetOneBit (pBs, &uiCode)); //num_ref_idx_active_override_flag
+    pSliceHead->bNumRefIdxActiveOverrideFlag = !!uiCode;
+    if (pSliceHead->bNumRefIdxActiveOverrideFlag) {
+      WELS_READ_VERIFY (BsGetUe (pBs, &uiCode)); //num_ref_idx_l0_active_minus1
+      WELS_CHECK_SE_UPPER_ERROR (uiCode, MAX_NUM_REF_IDX_L0_ACTIVE_MINUS1, "num_ref_idx_l0_active_minus1",
+                                 GENERATE_ERROR_NO (ERR_LEVEL_SLICE_HEADER, ERR_INFO_INVALID_NUM_REF_IDX_L0_ACTIVE_MINUS1));
+      pSliceHead->uiRefCount[0] = 1 + uiCode;
+      if (B_SLICE == uiSliceType) {
+        WELS_READ_VERIFY (BsGetUe (pBs, &uiCode)); //num_ref_idx_l1_active_minus1
+        WELS_CHECK_SE_UPPER_ERROR (uiCode, MAX_NUM_REF_IDX_L1_ACTIVE_MINUS1, "num_ref_idx_l1_active_minus1",
+                                   GENERATE_ERROR_NO (ERR_LEVEL_SLICE_HEADER, ERR_INFO_INVALID_NUM_REF_IDX_L1_ACTIVE_MINUS1));
+        pSliceHead->uiRefCount[1] = 1 + uiCode;
+      }
+    }
+  }
+
+  if (pSliceHead->uiRefCount[0] > MAX_REF_PIC_COUNT || pSliceHead->uiRefCount[1] > MAX_REF_PIC_COUNT) {
+    WelsLog (pLogCtx, WELS_LOG_WARNING, "reference overflow");
+    return GENERATE_ERROR_NO (ERR_LEVEL_SLICE_HEADER, ERR_INFO_REF_COUNT_OVERFLOW);
+  }
+
+  if (BASE_QUALITY_ID == uiQualityId) {
+    iRet = ParseRefPicListReordering (pBs, pSliceHead);
+    if (iRet != ERR_NONE) {
+      WelsLog (pLogCtx, WELS_LOG_WARNING, "invalid ref pPic list reordering syntaxs!");
+      return iRet;
+    }
+
+    if ((pPps->bWeightedPredFlag && uiSliceType == P_SLICE) || (pPps->uiWeightedBipredIdc == 1 && uiSliceType == B_SLICE)) {
+      iRet = ParsePredWeightedTable (pBs, pSliceHead);
+      if (iRet != ERR_NONE) {
+        WelsLog (pLogCtx, WELS_LOG_WARNING, "invalid weighted prediction syntaxs!");
+        return iRet;
+      }
+    }
+
+    if (kbExtensionFlag) {
+      if (pNalHeaderExt->iNoInterLayerPredFlag || pNalHeaderExt->uiQualityId > 0)
+        pSliceHeadExt->bBasePredWeightTableFlag = false;
+      else
+        pSliceHeadExt->bBasePredWeightTableFlag = true;
+    }
+
+    if (kpCurNal->sNalHeaderExt.sNalUnitHeader.uiNalRefIdc != 0) {
+      iRet = ParseDecRefPicMarking (pCtx, pBs, pSliceHead, pSps, bIdrFlag);
+      if (iRet != ERR_NONE) {
+        return iRet;
+      }
+
+      if (kbExtensionFlag && !pSubsetSps->sSpsSvcExt.bSliceHeaderRestrictionFlag) {
+        WELS_READ_VERIFY (BsGetOneBit (pBs, &uiCode)); //store_ref_base_pic_flag
+        pSliceHeadExt->bStoreRefBasePicFlag = !!uiCode;
+        if ((pNalHeaderExt->bUseRefBasePicFlag || pSliceHeadExt->bStoreRefBasePicFlag) && !bIdrFlag) {
+          WelsLog (pLogCtx, WELS_LOG_WARNING,
+                   "ParseSliceHeaderSyntaxs(): bUseRefBasePicFlag or bStoreRefBasePicFlag = 1 not supported.");
+          return GENERATE_ERROR_NO (ERR_LEVEL_SLICE_HEADER, ERR_INFO_UNSUPPORTED_ILP);
+        }
+      }
+    }
+  }
+
+  if (pPps->bEntropyCodingModeFlag) {
+    if (pSliceHead->eSliceType != I_SLICE && pSliceHead->eSliceType != SI_SLICE) {
+      WELS_READ_VERIFY (BsGetUe (pBs, &uiCode));
+      WELS_CHECK_SE_UPPER_ERROR (uiCode, SLICE_HEADER_CABAC_INIT_IDC_MAX, "cabac_init_idc", ERR_INFO_INVALID_CABAC_INIT_IDC);
+      pSliceHead->iCabacInitIdc = uiCode;
+    } else
+      pSliceHead->iCabacInitIdc = 0;
+  }
+
+  WELS_READ_VERIFY (BsGetSe (pBs, &iCode)); //slice_qp_delta
+  pSliceHead->iSliceQpDelta     = iCode;
+  pSliceHead->iSliceQp          = pPps->iPicInitQp + pSliceHead->iSliceQpDelta;
+  if (pSliceHead->iSliceQp < 0 || pSliceHead->iSliceQp > 51) {
+    WelsLog (pLogCtx, WELS_LOG_WARNING, "QP %d out of range", pSliceHead->iSliceQp);
+    return GENERATE_ERROR_NO (ERR_LEVEL_SLICE_HEADER, ERR_INFO_INVALID_QP);
+  }
+
+  //FIXME qscale / qp ... stuff
+  if (!kbExtensionFlag) {
+    if (uiSliceType == SP_SLICE || uiSliceType == SI_SLICE) {
+      WelsLog (pLogCtx, WELS_LOG_WARNING, "SP/SI not supported");
+      return GENERATE_ERROR_NO (ERR_LEVEL_SLICE_HEADER, ERR_INFO_UNSUPPORTED_SPSI);
+    }
+  }
+
+  pSliceHead->uiDisableDeblockingFilterIdc = 0;
+  pSliceHead->iSliceAlphaC0Offset          = 0;
+  pSliceHead->iSliceBetaOffset             = 0;
+  if (pPps->bDeblockingFilterControlPresentFlag) {
+    WELS_READ_VERIFY (BsGetUe (pBs, &uiCode)); //disable_deblocking_filter_idc
+    pSliceHead->uiDisableDeblockingFilterIdc = uiCode;
+    //refer to JVT-X201wcm1.doc G.7.4.3.4--2010.4.20
+    if (pSliceHead->uiDisableDeblockingFilterIdc > 6) {
+      WelsLog (pLogCtx, WELS_LOG_WARNING, "disable_deblock_filter_idc (%d) out of range [0, 6]",
+               pSliceHead->uiDisableDeblockingFilterIdc);
+      return GENERATE_ERROR_NO (ERR_LEVEL_SLICE_HEADER, ERR_INFO_INVALID_DBLOCKING_IDC);
+    }
+    if (pSliceHead->uiDisableDeblockingFilterIdc != 1) {
+      WELS_READ_VERIFY (BsGetSe (pBs, &iCode)); //slice_alpha_c0_offset_div2
+      pSliceHead->iSliceAlphaC0Offset = iCode * 2;
+      WELS_CHECK_SE_BOTH_ERROR (pSliceHead->iSliceAlphaC0Offset, SLICE_HEADER_ALPHAC0_BETA_OFFSET_MIN,
+                                SLICE_HEADER_ALPHAC0_BETA_OFFSET_MAX, "slice_alpha_c0_offset_div2 * 2", GENERATE_ERROR_NO (ERR_LEVEL_SLICE_HEADER,
+                                    ERR_INFO_INVALID_SLICE_ALPHA_C0_OFFSET_DIV2));
+      WELS_READ_VERIFY (BsGetSe (pBs, &iCode)); //slice_beta_offset_div2
+      pSliceHead->iSliceBetaOffset = iCode * 2;
+      WELS_CHECK_SE_BOTH_ERROR (pSliceHead->iSliceBetaOffset, SLICE_HEADER_ALPHAC0_BETA_OFFSET_MIN,
+                                SLICE_HEADER_ALPHAC0_BETA_OFFSET_MAX, "slice_beta_offset_div2 * 2", GENERATE_ERROR_NO (ERR_LEVEL_SLICE_HEADER,
+                                    ERR_INFO_INVALID_SLICE_BETA_OFFSET_DIV2));
+    }
+  }
+
+  bSgChangeCycleInvolved = (pPps->uiNumSliceGroups > 1 && pPps->uiSliceGroupMapType >= 3
+                            && pPps->uiSliceGroupMapType <= 5);
+  if (kbExtensionFlag && bSgChangeCycleInvolved)
+    bSgChangeCycleInvolved = (bSgChangeCycleInvolved && (uiQualityId == BASE_QUALITY_ID));
+  if (bSgChangeCycleInvolved) {
+    if (pPps->uiSliceGroupChangeRate > 0) {
+      const int32_t kiNumBits = (int32_t)WELS_CEIL (log (static_cast<double> (1 + pPps->uiPicSizeInMapUnits /
+                                pPps->uiSliceGroupChangeRate)));
+      WELS_READ_VERIFY (BsGetBits (pBs, kiNumBits, &uiCode)); //lice_group_change_cycle
+      pSliceHead->iSliceGroupChangeCycle = uiCode;
+    } else
+      pSliceHead->iSliceGroupChangeCycle = 0;
+  }
+
+  if (!kbExtensionFlag) {
+    FillDefaultSliceHeaderExt (pSliceHeadExt, pNalHeaderExt);
+  } else {
+    /* Extra syntax elements newly introduced */
+    pSliceHeadExt->pSubsetSps = pSubsetSps;
+
+    if (!pNalHeaderExt->iNoInterLayerPredFlag && BASE_QUALITY_ID == uiQualityId) {
+      //the following should be deleted for CODE_CLEAN
+      WELS_READ_VERIFY (BsGetUe (pBs, &uiCode)); //ref_layer_dq_id
+      pSliceHeadExt->uiRefLayerDqId = uiCode;
+      if (pSubsetSps->sSpsSvcExt.bInterLayerDeblockingFilterCtrlPresentFlag) {
+        WELS_READ_VERIFY (BsGetUe (pBs, &uiCode)); //disable_inter_layer_deblocking_filter_idc
+        pSliceHeadExt->uiDisableInterLayerDeblockingFilterIdc = uiCode;
+        //refer to JVT-X201wcm1.doc G.7.4.3.4--2010.4.20
+        if (pSliceHeadExt->uiDisableInterLayerDeblockingFilterIdc > 6) {
+          WelsLog (& (pCtx->sLogCtx), WELS_LOG_WARNING, "disable_inter_layer_deblock_filter_idc (%d) out of range [0, 6]",
+                   pSliceHeadExt->uiDisableInterLayerDeblockingFilterIdc);
+          return GENERATE_ERROR_NO (ERR_LEVEL_SLICE_HEADER, ERR_INFO_INVALID_DBLOCKING_IDC);
+        }
+        if (pSliceHeadExt->uiDisableInterLayerDeblockingFilterIdc != 1) {
+          WELS_READ_VERIFY (BsGetSe (pBs, &iCode)); //inter_layer_slice_alpha_c0_offset_div2
+          pSliceHeadExt->iInterLayerSliceAlphaC0Offset = iCode * 2;
+          WELS_CHECK_SE_BOTH_ERROR (pSliceHeadExt->iInterLayerSliceAlphaC0Offset,
+                                    SLICE_HEADER_INTER_LAYER_ALPHAC0_BETA_OFFSET_MIN, SLICE_HEADER_INTER_LAYER_ALPHAC0_BETA_OFFSET_MAX,
+                                    "inter_layer_alpha_c0_offset_div2 * 2", GENERATE_ERROR_NO (ERR_LEVEL_SLICE_HEADER,
+                                        ERR_INFO_INVALID_SLICE_ALPHA_C0_OFFSET_DIV2));
+          WELS_READ_VERIFY (BsGetSe (pBs, &iCode)); //inter_layer_slice_beta_offset_div2
+          pSliceHeadExt->iInterLayerSliceBetaOffset = iCode * 2;
+          WELS_CHECK_SE_BOTH_ERROR (pSliceHeadExt->iInterLayerSliceBetaOffset, SLICE_HEADER_INTER_LAYER_ALPHAC0_BETA_OFFSET_MIN,
+                                    SLICE_HEADER_INTER_LAYER_ALPHAC0_BETA_OFFSET_MAX, "inter_layer_slice_beta_offset_div2 * 2",
+                                    GENERATE_ERROR_NO (ERR_LEVEL_SLICE_HEADER, ERR_INFO_INVALID_SLICE_BETA_OFFSET_DIV2));
+        }
+      }
+
+      pSliceHeadExt->uiRefLayerChromaPhaseXPlus1Flag = pSubsetSps->sSpsSvcExt.uiSeqRefLayerChromaPhaseXPlus1Flag;
+      pSliceHeadExt->uiRefLayerChromaPhaseYPlus1     = pSubsetSps->sSpsSvcExt.uiSeqRefLayerChromaPhaseYPlus1;
+
+      WELS_READ_VERIFY (BsGetOneBit (pBs, &uiCode)); //constrained_intra_resampling_flag
+      pSliceHeadExt->bConstrainedIntraResamplingFlag = !!uiCode;
+
+      {
+        SPosOffset pos;
+        pos.iLeftOffset   = pSubsetSps->sSpsSvcExt.sSeqScaledRefLayer.iLeftOffset;
+        pos.iTopOffset    = pSubsetSps->sSpsSvcExt.sSeqScaledRefLayer.iTopOffset * (2 - pSps->bFrameMbsOnlyFlag);
+        pos.iRightOffset  = pSubsetSps->sSpsSvcExt.sSeqScaledRefLayer.iRightOffset;
+        pos.iBottomOffset = pSubsetSps->sSpsSvcExt.sSeqScaledRefLayer.iBottomOffset * (2 - pSps->bFrameMbsOnlyFlag);
+        //memcpy(&pSliceHeadExt->sScaledRefLayer, &pos, sizeof(SPosOffset));//confirmed_safe_unsafe_usage
+        pSliceHeadExt->iScaledRefLayerPicWidthInSampleLuma  = (pSliceHead->iMbWidth << 4) -
+            (pos.iLeftOffset + pos.iRightOffset);
+        pSliceHeadExt->iScaledRefLayerPicHeightInSampleLuma = (pSliceHead->iMbHeight << 4) -
+            (pos.iTopOffset + pos.iBottomOffset) / (1 + pSliceHead->bFieldPicFlag);
+      }
+    } else if (uiQualityId > BASE_QUALITY_ID) {
+      WelsLog (pLogCtx, WELS_LOG_WARNING, "MGS not supported.");
+      return GENERATE_ERROR_NO (ERR_LEVEL_SLICE_HEADER, ERR_INFO_UNSUPPORTED_MGS);
+    } else {
+      pSliceHeadExt->uiRefLayerDqId = (uint8_t) - 1;
+    }
+
+    pSliceHeadExt->bSliceSkipFlag            = false;
+    pSliceHeadExt->bAdaptiveBaseModeFlag     = false;
+    pSliceHeadExt->bDefaultBaseModeFlag      = false;
+    pSliceHeadExt->bAdaptiveMotionPredFlag   = false;
+    pSliceHeadExt->bDefaultMotionPredFlag    = false;
+    pSliceHeadExt->bAdaptiveResidualPredFlag = false;
+    pSliceHeadExt->bDefaultResidualPredFlag  = false;
+    if (pNalHeaderExt->iNoInterLayerPredFlag)
+      pSliceHeadExt->bTCoeffLevelPredFlag    = false;
+    else
+      pSliceHeadExt->bTCoeffLevelPredFlag    = pSubsetSps->sSpsSvcExt.bSeqTCoeffLevelPredFlag;
+
+    if (!pNalHeaderExt->iNoInterLayerPredFlag) {
+      WELS_READ_VERIFY (BsGetOneBit (pBs, &uiCode)); //slice_skip_flag
+      pSliceHeadExt->bSliceSkipFlag = !!uiCode;
+      if (pSliceHeadExt->bSliceSkipFlag) {
+        WelsLog (pLogCtx, WELS_LOG_WARNING, "bSliceSkipFlag == 1 not supported.");
+        return GENERATE_ERROR_NO (ERR_LEVEL_SLICE_HEADER, ERR_INFO_UNSUPPORTED_SLICESKIP);
+      } else {
+        WELS_READ_VERIFY (BsGetOneBit (pBs, &uiCode)); //adaptive_base_mode_flag
+        pSliceHeadExt->bAdaptiveBaseModeFlag = !!uiCode;
+        if (!pSliceHeadExt->bAdaptiveBaseModeFlag) {
+          WELS_READ_VERIFY (BsGetOneBit (pBs, &uiCode)); //default_base_mode_flag
+          pSliceHeadExt->bDefaultBaseModeFlag = !!uiCode;
+        }
+        if (!pSliceHeadExt->bDefaultBaseModeFlag) {
+          WELS_READ_VERIFY (BsGetOneBit (pBs, &uiCode)); //adaptive_motion_prediction_flag
+          pSliceHeadExt->bAdaptiveMotionPredFlag = !!uiCode;
+          if (!pSliceHeadExt->bAdaptiveMotionPredFlag) {
+            WELS_READ_VERIFY (BsGetOneBit (pBs, &uiCode)); //default_motion_prediction_flag
+            pSliceHeadExt->bDefaultMotionPredFlag = !!uiCode;
+          }
+        }
+
+        WELS_READ_VERIFY (BsGetOneBit (pBs, &uiCode)); //adaptive_residual_prediction_flag
+        pSliceHeadExt->bAdaptiveResidualPredFlag = !!uiCode;
+        if (!pSliceHeadExt->bAdaptiveResidualPredFlag) {
+          WELS_READ_VERIFY (BsGetOneBit (pBs, &uiCode)); //default_residual_prediction_flag
+          pSliceHeadExt->bDefaultResidualPredFlag = !!uiCode;
+        }
+      }
+      if (pSubsetSps->sSpsSvcExt.bAdaptiveTCoeffLevelPredFlag) {
+        WELS_READ_VERIFY (BsGetOneBit (pBs, &uiCode)); //tcoeff_level_prediction_flag
+        pSliceHeadExt->bTCoeffLevelPredFlag = !!uiCode;
+      }
+    }
+
+    if (!pSubsetSps->sSpsSvcExt.bSliceHeaderRestrictionFlag) {
+      WELS_READ_VERIFY (BsGetBits (pBs, 4, &uiCode)); //scan_idx_start
+      pSliceHeadExt->uiScanIdxStart = uiCode;
+      WELS_READ_VERIFY (BsGetBits (pBs, 4, &uiCode)); //scan_idx_end
+      pSliceHeadExt->uiScanIdxEnd = uiCode;
+      if (pSliceHeadExt->uiScanIdxStart != 0 || pSliceHeadExt->uiScanIdxEnd != 15) {
+        WelsLog (pLogCtx, WELS_LOG_WARNING, "uiScanIdxStart (%d) != 0 and uiScanIdxEnd (%d) !=15 not supported here",
+                 pSliceHeadExt->uiScanIdxStart, pSliceHeadExt->uiScanIdxEnd);
+        return GENERATE_ERROR_NO (ERR_LEVEL_SLICE_HEADER, ERR_INFO_UNSUPPORTED_MGS);
+      }
+    } else {
+      pSliceHeadExt->uiScanIdxStart = 0;
+      pSliceHeadExt->uiScanIdxEnd   = 15;
+    }
+  }
+
+  return ERR_NONE;
+}
+
+/*
+ *  Copy relative syntax elements of NALUnitHeaderExt, sRefPicBaseMarking and bStoreRefBasePicFlag in prefix nal unit.
+ *  pSrc:   mark as decoded prefix NAL
+ *  ppDst:  succeeded VCL NAL based AVC (I/P Slice)
+ */
+bool PrefetchNalHeaderExtSyntax (PWelsDecoderContext pCtx, PNalUnit const kppDst, PNalUnit const kpSrc) {
+  PNalUnitHeaderExt pNalHdrExtD = NULL, pNalHdrExtS = NULL;
+  PSliceHeaderExt pShExtD = NULL;
+  PPrefixNalUnit pPrefixS = NULL;
+  PSps pSps = NULL;
+  int32_t iIdx = 0;
+
+  if (kppDst == NULL || kpSrc == NULL)
+    return false;
+
+  pNalHdrExtD   = &kppDst->sNalHeaderExt;
+  pNalHdrExtS   = &kpSrc->sNalHeaderExt;
+  pShExtD       = &kppDst->sNalData.sVclNal.sSliceHeaderExt;
+  pPrefixS      = &kpSrc->sNalData.sPrefixNal;
+  pSps          = &pCtx->sSpsBuffer[pCtx->sPpsBuffer[pShExtD->sSliceHeader.iPpsId].iSpsId];
+
+  pNalHdrExtD->uiDependencyId           = pNalHdrExtS->uiDependencyId;
+  pNalHdrExtD->uiQualityId              = pNalHdrExtS->uiQualityId;
+  pNalHdrExtD->uiTemporalId             = pNalHdrExtS->uiTemporalId;
+  pNalHdrExtD->uiPriorityId             = pNalHdrExtS->uiPriorityId;
+  pNalHdrExtD->bIdrFlag                 = pNalHdrExtS->bIdrFlag;
+  pNalHdrExtD->iNoInterLayerPredFlag    = pNalHdrExtS->iNoInterLayerPredFlag;
+  pNalHdrExtD->bDiscardableFlag         = pNalHdrExtS->bDiscardableFlag;
+  pNalHdrExtD->bOutputFlag              = pNalHdrExtS->bOutputFlag;
+  pNalHdrExtD->bUseRefBasePicFlag       = pNalHdrExtS->bUseRefBasePicFlag;
+  pNalHdrExtD->uiLayerDqId              = pNalHdrExtS->uiLayerDqId;
+
+  pShExtD->bStoreRefBasePicFlag         = pPrefixS->bStoreRefBasePicFlag;
+  memcpy (&pShExtD->sRefBasePicMarking, &pPrefixS->sRefPicBaseMarking,
+          sizeof (SRefBasePicMarking)); //confirmed_safe_unsafe_usage
+  if (pShExtD->sRefBasePicMarking.bAdaptiveRefBasePicMarkingModeFlag) {
+    PRefBasePicMarking pRefBasePicMarking = &pShExtD->sRefBasePicMarking;
+    iIdx = 0;
+    do {
+      if (pRefBasePicMarking->mmco_base[iIdx].uiMmcoType == MMCO_END)
+        break;
+      if (pRefBasePicMarking->mmco_base[iIdx].uiMmcoType == MMCO_SHORT2UNUSED)
+        pRefBasePicMarking->mmco_base[iIdx].iShortFrameNum = (pShExtD->sSliceHeader.iFrameNum -
+            pRefBasePicMarking->mmco_base[iIdx].uiDiffOfPicNums) & ((1 << pSps->uiLog2MaxFrameNum) - 1);
+      ++ iIdx;
+    } while (iIdx < MAX_MMCO_COUNT);
+  }
+
+  return true;
+}
+
+
+
+int32_t UpdateAccessUnit (PWelsDecoderContext pCtx) {
+  PAccessUnit pCurAu   = pCtx->pAccessUnitList;
+  int32_t iIdx         = pCurAu->uiEndPos;
+
+  // Conversed iterator
+  pCtx->uiTargetDqId = pCurAu->pNalUnitsList[iIdx]->sNalHeaderExt.uiLayerDqId;
+  pCurAu->uiActualUnitsNum  = iIdx + 1;
+  pCurAu->bCompletedAuFlag = true;
+
+  // Added for mosaic avoidance, 11/19/2009
+#ifdef LONG_TERM_REF
+  if (pCtx->bParamSetsLostFlag || pCtx->bNewSeqBegin)
+#else
+  if (pCtx->bReferenceLostAtT0Flag || pCtx->bNewSeqBegin)
+#endif
+  {
+    uint32_t uiActualIdx = 0;
+    while (uiActualIdx < pCurAu->uiActualUnitsNum) {
+      PNalUnit nal = pCurAu->pNalUnitsList[uiActualIdx];
+
+      if (nal->sNalHeaderExt.sNalUnitHeader.eNalUnitType == NAL_UNIT_CODED_SLICE_IDR || nal->sNalHeaderExt.bIdrFlag) {
+        break;
+      }
+      ++ uiActualIdx;
+    }
+    if (uiActualIdx ==
+        pCurAu->uiActualUnitsNum) { // no found IDR nal within incoming AU, need exit to avoid mosaic issue, 11/19/2009
+
+      pCtx->sDecoderStatistics.uiIDRLostNum++;
+      if (!pCtx->bParamSetsLostFlag)
+        WelsLog (& (pCtx->sLogCtx), WELS_LOG_WARNING,
+                 "UpdateAccessUnit():::::Key frame lost.....CAN NOT find IDR from current AU.");
+      pCtx->iErrorCode |= dsRefLost;
+      if (pCtx->pParam->eEcActiveIdc == ERROR_CON_DISABLE) {
+#ifdef LONG_TERM_REF
+        pCtx->iErrorCode |= dsNoParamSets;
+        return dsNoParamSets;
+#else
+        pCtx->iErrorCode |= dsRefLost;
+        return ERR_INFO_REFERENCE_PIC_LOST;
+#endif
+      }
+    }
+  }
+
+  return ERR_NONE;
+}
+
+int32_t InitialDqLayersContext (PWelsDecoderContext pCtx, const int32_t kiMaxWidth, const int32_t kiMaxHeight) {
+  int32_t i = 0;
+
+  WELS_VERIFY_RETURN_IF (ERR_INFO_INVALID_PARAM, (NULL == pCtx || kiMaxWidth <= 0 || kiMaxHeight <= 0))
+  pCtx->sMb.iMbWidth  = (kiMaxWidth + 15) >> 4;
+  pCtx->sMb.iMbHeight = (kiMaxHeight + 15) >> 4;
+
+  if (pCtx->bInitialDqLayersMem && kiMaxWidth <= pCtx->iPicWidthReq
+      && kiMaxHeight <= pCtx->iPicHeightReq) // have same dimension memory, skipped
+    return ERR_NONE;
+
+  CMemoryAlign* pMa = pCtx->pMemAlign;
+
+  UninitialDqLayersContext (pCtx);
+
+  do {
+    PDqLayer pDq = (PDqLayer)pMa->WelsMallocz (sizeof (SDqLayer), "PDqLayer");
+
+    if (pDq == NULL)
+      return ERR_INFO_OUT_OF_MEMORY;
+
+    pCtx->pDqLayersList[i] = pDq; //to keep consistence with in UninitialDqLayersContext()
+    memset (pDq, 0, sizeof (SDqLayer));
+
+    pCtx->sMb.pMbType[i] = (uint32_t*)pMa->WelsMallocz (pCtx->sMb.iMbWidth * pCtx->sMb.iMbHeight * sizeof (uint32_t),
+                           "pCtx->sMb.pMbType[]");
+    pCtx->sMb.pMv[i][LIST_0] = (int16_t (*)[16][2])pMa->WelsMallocz (pCtx->sMb.iMbWidth * pCtx->sMb.iMbHeight * sizeof (
+                                 int16_t) * MV_A * MB_BLOCK4x4_NUM, "pCtx->sMb.pMv[][]");
+    pCtx->sMb.pMv[i][LIST_1] = (int16_t (*)[16][2])pMa->WelsMallocz (pCtx->sMb.iMbWidth * pCtx->sMb.iMbHeight * sizeof (
+                                 int16_t) * MV_A * MB_BLOCK4x4_NUM, "pCtx->sMb.pMv[][]");
+
+    pCtx->sMb.pRefIndex[i][LIST_0] = (int8_t (*)[MB_BLOCK4x4_NUM])pMa->WelsMallocz (pCtx->sMb.iMbWidth *
+                                     pCtx->sMb.iMbHeight *
+                                     sizeof (
+                                       int8_t) * MB_BLOCK4x4_NUM, "pCtx->sMb.pRefIndex[][]");
+    pCtx->sMb.pRefIndex[i][LIST_1] = (int8_t (*)[MB_BLOCK4x4_NUM])pMa->WelsMallocz (pCtx->sMb.iMbWidth *
+                                     pCtx->sMb.iMbHeight *
+                                     sizeof (
+                                       int8_t) * MB_BLOCK4x4_NUM, "pCtx->sMb.pRefIndex[][]");
+    pCtx->sMb.pDirect[i] = (int8_t (*)[MB_BLOCK4x4_NUM])pMa->WelsMallocz (pCtx->sMb.iMbWidth * pCtx->sMb.iMbHeight *
+                           sizeof (
+                             int8_t) * MB_BLOCK4x4_NUM, "pCtx->sMb.pDirect[]");
+    pCtx->sMb.pLumaQp[i] = (int8_t*)pMa->WelsMallocz (pCtx->sMb.iMbWidth * pCtx->sMb.iMbHeight * sizeof (int8_t),
+                           "pCtx->sMb.pLumaQp[]");
+    pCtx->sMb.pNoSubMbPartSizeLessThan8x8Flag[i] = (bool*)pMa->WelsMallocz (pCtx->sMb.iMbWidth * pCtx->sMb.iMbHeight *
+        sizeof (
+          bool),
+        "pCtx->sMb.pNoSubMbPartSizeLessThan8x8Flag[]");
+    pCtx->sMb.pTransformSize8x8Flag[i] = (bool*)pMa->WelsMallocz (pCtx->sMb.iMbWidth * pCtx->sMb.iMbHeight * sizeof (bool),
+                                         "pCtx->sMb.pTransformSize8x8Flag[]");
+    pCtx->sMb.pChromaQp[i] = (int8_t (*)[2])pMa->WelsMallocz (pCtx->sMb.iMbWidth * pCtx->sMb.iMbHeight * sizeof (
+                               int8_t) * 2,
+                             "pCtx->sMb.pChromaQp[]");
+    pCtx->sMb.pMvd[i][LIST_0] = (int16_t (*)[16][2])pMa->WelsMallocz (pCtx->sMb.iMbWidth * pCtx->sMb.iMbHeight * sizeof (
+                                  int16_t) * MV_A * MB_BLOCK4x4_NUM, "pCtx->sMb.pMvd[][]");
+    pCtx->sMb.pMvd[i][LIST_1] = (int16_t (*)[16][2])pMa->WelsMallocz (pCtx->sMb.iMbWidth * pCtx->sMb.iMbHeight * sizeof (
+                                  int16_t) * MV_A * MB_BLOCK4x4_NUM, "pCtx->sMb.pMvd[][]");
+    pCtx->sMb.pCbfDc[i] = (uint16_t*)pMa->WelsMallocz (pCtx->sMb.iMbWidth * pCtx->sMb.iMbHeight * sizeof (uint16_t),
+                          "pCtx->sMb.pCbfDc[]");
+    pCtx->sMb.pNzc[i] = (int8_t (*)[24])pMa->WelsMallocz (pCtx->sMb.iMbWidth * pCtx->sMb.iMbHeight * sizeof (int8_t) * 24,
+                        "pCtx->sMb.pNzc[]");
+    pCtx->sMb.pNzcRs[i] = (int8_t (*)[24])pMa->WelsMallocz (pCtx->sMb.iMbWidth * pCtx->sMb.iMbHeight * sizeof (int8_t) * 24,
+                          "pCtx->sMb.pNzcRs[]");
+    pCtx->sMb.pScaledTCoeff[i] = (int16_t (*)[MB_COEFF_LIST_SIZE])pMa->WelsMallocz (pCtx->sMb.iMbWidth *
+                                 pCtx->sMb.iMbHeight *
+                                 sizeof (int16_t) * MB_COEFF_LIST_SIZE, "pCtx->sMb.pScaledTCoeff[]");
+    pCtx->sMb.pIntraPredMode[i] = (int8_t (*)[8])pMa->WelsMallocz (pCtx->sMb.iMbWidth * pCtx->sMb.iMbHeight * sizeof (
+                                    int8_t) * 8,
+                                  "pCtx->sMb.pIntraPredMode[]");
+    pCtx->sMb.pIntra4x4FinalMode[i] = (int8_t (*)[MB_BLOCK4x4_NUM])pMa->WelsMallocz (pCtx->sMb.iMbWidth *
+                                      pCtx->sMb.iMbHeight *
+                                      sizeof (int8_t) * MB_BLOCK4x4_NUM, "pCtx->sMb.pIntra4x4FinalMode[]");
+    pCtx->sMb.pIntraNxNAvailFlag[i] = (uint8_t (*))pMa->WelsMallocz (pCtx->sMb.iMbWidth * pCtx->sMb.iMbHeight * sizeof (
+                                        int8_t),
+                                      "pCtx->sMb.pIntraNxNAvailFlag");
+    pCtx->sMb.pChromaPredMode[i] = (int8_t*)pMa->WelsMallocz (pCtx->sMb.iMbWidth * pCtx->sMb.iMbHeight * sizeof (int8_t),
+                                   "pCtx->sMb.pChromaPredMode[]");
+    pCtx->sMb.pCbp[i] = (int8_t*)pMa->WelsMallocz (pCtx->sMb.iMbWidth * pCtx->sMb.iMbHeight * sizeof (int8_t),
+                        "pCtx->sMb.pCbp[]");
+    pCtx->sMb.pSubMbType[i] = (uint32_t (*)[MB_PARTITION_SIZE])pMa->WelsMallocz (pCtx->sMb.iMbWidth * pCtx->sMb.iMbHeight *
+                              sizeof (
+                                uint32_t) * MB_PARTITION_SIZE, "pCtx->sMb.pSubMbType[]");
+    pCtx->sMb.pSliceIdc[i] = (int32_t*) pMa->WelsMallocz (pCtx->sMb.iMbWidth * pCtx->sMb.iMbHeight * sizeof (int32_t),
+                             "pCtx->sMb.pSliceIdc[]"); // using int32_t for slice_idc, 4/21/2010
+    pCtx->sMb.pResidualPredFlag[i] = (int8_t*) pMa->WelsMallocz (pCtx->sMb.iMbWidth * pCtx->sMb.iMbHeight * sizeof (int8_t),
+                                     "pCtx->sMb.pResidualPredFlag[]");
+    pCtx->sMb.pInterPredictionDoneFlag[i] = (int8_t*) pMa->WelsMallocz (pCtx->sMb.iMbWidth * pCtx->sMb.iMbHeight * sizeof (
+        int8_t), "pCtx->sMb.pInterPredictionDoneFlag[]");
+
+    pCtx->sMb.pMbCorrectlyDecodedFlag[i] = (bool*) pMa->WelsMallocz (pCtx->sMb.iMbWidth * pCtx->sMb.iMbHeight * sizeof (
+        bool),
+                                           "pCtx->sMb.pMbCorrectlyDecodedFlag[]");
+    pCtx->sMb.pMbRefConcealedFlag[i] = (bool*) pMa->WelsMallocz (pCtx->sMb.iMbWidth * pCtx->sMb.iMbHeight * sizeof (bool),
+                                       "pCtx->pMbRefConcealedFlag[]");
+
+    // check memory block valid due above allocated..
+    WELS_VERIFY_RETURN_IF (ERR_INFO_OUT_OF_MEMORY,
+                           ((NULL == pCtx->sMb.pMbType[i]) ||
+                            (NULL == pCtx->sMb.pMv[i][LIST_0]) ||
+                            (NULL == pCtx->sMb.pMv[i][LIST_1]) ||
+                            (NULL == pCtx->sMb.pRefIndex[i][LIST_0]) ||
+                            (NULL == pCtx->sMb.pRefIndex[i][LIST_1]) ||
+                            (NULL == pCtx->sMb.pDirect[i]) ||
+                            (NULL == pCtx->sMb.pLumaQp[i]) ||
+                            (NULL == pCtx->sMb.pNoSubMbPartSizeLessThan8x8Flag[i]) ||
+                            (NULL == pCtx->sMb.pTransformSize8x8Flag[i]) ||
+                            (NULL == pCtx->sMb.pChromaQp[i]) ||
+                            (NULL == pCtx->sMb.pMvd[i][LIST_0]) ||
+                            (NULL == pCtx->sMb.pMvd[i][LIST_1]) ||
+                            (NULL == pCtx->sMb.pCbfDc[i]) ||
+                            (NULL == pCtx->sMb.pNzc[i]) ||
+                            (NULL == pCtx->sMb.pNzcRs[i]) ||
+                            (NULL == pCtx->sMb.pScaledTCoeff[i]) ||
+                            (NULL == pCtx->sMb.pIntraPredMode[i]) ||
+                            (NULL == pCtx->sMb.pIntra4x4FinalMode[i]) ||
+                            (NULL == pCtx->sMb.pIntraNxNAvailFlag[i]) ||
+                            (NULL == pCtx->sMb.pChromaPredMode[i]) ||
+                            (NULL == pCtx->sMb.pCbp[i]) ||
+                            (NULL == pCtx->sMb.pSubMbType[i]) ||
+                            (NULL == pCtx->sMb.pSliceIdc[i]) ||
+                            (NULL == pCtx->sMb.pResidualPredFlag[i]) ||
+                            (NULL == pCtx->sMb.pInterPredictionDoneFlag[i]) ||
+                            (NULL == pCtx->sMb.pMbRefConcealedFlag[i]) ||
+                            (NULL == pCtx->sMb.pMbCorrectlyDecodedFlag[i])
+                           )
+                          )
+
+    memset (pCtx->sMb.pSliceIdc[i], 0xff, (pCtx->sMb.iMbWidth * pCtx->sMb.iMbHeight * sizeof (int32_t)));
+
+    ++ i;
+  } while (i < LAYER_NUM_EXCHANGEABLE);
+
+  pCtx->bInitialDqLayersMem     = true;
+  pCtx->iPicWidthReq            = kiMaxWidth;
+  pCtx->iPicHeightReq           = kiMaxHeight;
+
+  return ERR_NONE;
+}
+
+void UninitialDqLayersContext (PWelsDecoderContext pCtx) {
+  int32_t i = 0;
+  CMemoryAlign* pMa = pCtx->pMemAlign;
+
+  do {
+    PDqLayer pDq = pCtx->pDqLayersList[i];
+    if (pDq == NULL) {
+      ++ i;
+      continue;
+    }
+
+    if (pCtx->sMb.pMbType[i]) {
+      pMa->WelsFree (pCtx->sMb.pMbType[i], "pCtx->sMb.pMbType[]");
+
+      pCtx->sMb.pMbType[i] = NULL;
+    }
+
+    for (int32_t listIdx = LIST_0; listIdx < LIST_A; ++listIdx) {
+      if (pCtx->sMb.pMv[i][listIdx]) {
+        pMa->WelsFree (pCtx->sMb.pMv[i][listIdx], "pCtx->sMb.pMv[][]");
+        pCtx->sMb.pMv[i][listIdx] = NULL;
+      }
+
+      if (pCtx->sMb.pRefIndex[i][listIdx]) {
+        pMa->WelsFree (pCtx->sMb.pRefIndex[i][listIdx], "pCtx->sMb.pRefIndex[][]");
+        pCtx->sMb.pRefIndex[i][listIdx] = NULL;
+      }
+
+      if (pCtx->sMb.pDirect[i]) {
+        pMa->WelsFree (pCtx->sMb.pDirect[i], "pCtx->sMb.pDirect[]");
+        pCtx->sMb.pDirect[i] = NULL;
+      }
+
+      if (pCtx->sMb.pMvd[i][listIdx]) {
+        pMa->WelsFree (pCtx->sMb.pMvd[i][listIdx], "pCtx->sMb.pMvd[][]");
+        pCtx->sMb.pMvd[i][listIdx] = NULL;
+      }
+    }
+
+    if (pCtx->sMb.pNoSubMbPartSizeLessThan8x8Flag[i]) {
+      pMa->WelsFree (pCtx->sMb.pNoSubMbPartSizeLessThan8x8Flag[i], "pCtx->sMb.pNoSubMbPartSizeLessThan8x8Flag[]");
+
+      pCtx->sMb.pNoSubMbPartSizeLessThan8x8Flag[i] = NULL;
+    }
+
+    if (pCtx->sMb.pTransformSize8x8Flag[i]) {
+      pMa->WelsFree (pCtx->sMb.pTransformSize8x8Flag[i], "pCtx->sMb.pTransformSize8x8Flag[]");
+
+      pCtx->sMb.pTransformSize8x8Flag[i] = NULL;
+    }
+
+    if (pCtx->sMb.pLumaQp[i]) {
+      pMa->WelsFree (pCtx->sMb.pLumaQp[i], "pCtx->sMb.pLumaQp[]");
+
+      pCtx->sMb.pLumaQp[i] = NULL;
+    }
+
+    if (pCtx->sMb.pChromaQp[i]) {
+      pMa->WelsFree (pCtx->sMb.pChromaQp[i], "pCtx->sMb.pChromaQp[]");
+
+      pCtx->sMb.pChromaQp[i] = NULL;
+    }
+
+    if (pCtx->sMb.pCbfDc[i]) {
+      pMa->WelsFree (pCtx->sMb.pCbfDc[i], "pCtx->sMb.pCbfDc[]");
+      pCtx->sMb.pCbfDc[i] = NULL;
+    }
+
+    if (pCtx->sMb.pNzc[i]) {
+      pMa->WelsFree (pCtx->sMb.pNzc[i], "pCtx->sMb.pNzc[]");
+
+      pCtx->sMb.pNzc[i] = NULL;
+    }
+
+    if (pCtx->sMb.pNzcRs[i]) {
+      pMa->WelsFree (pCtx->sMb.pNzcRs[i], "pCtx->sMb.pNzcRs[]");
+
+      pCtx->sMb.pNzcRs[i] = NULL;
+    }
+
+    if (pCtx->sMb.pScaledTCoeff[i]) {
+      pMa->WelsFree (pCtx->sMb.pScaledTCoeff[i], "pCtx->sMb.pScaledTCoeff[]");
+
+      pCtx->sMb.pScaledTCoeff[i] = NULL;
+    }
+
+    if (pCtx->sMb.pIntraPredMode[i]) {
+      pMa->WelsFree (pCtx->sMb.pIntraPredMode[i], "pCtx->sMb.pIntraPredMode[]");
+
+      pCtx->sMb.pIntraPredMode[i] = NULL;
+    }
+
+    if (pCtx->sMb.pIntra4x4FinalMode[i]) {
+      pMa->WelsFree (pCtx->sMb.pIntra4x4FinalMode[i], "pCtx->sMb.pIntra4x4FinalMode[]");
+
+      pCtx->sMb.pIntra4x4FinalMode[i] = NULL;
+    }
+
+    if (pCtx->sMb.pIntraNxNAvailFlag[i]) {
+      pMa->WelsFree (pCtx->sMb.pIntraNxNAvailFlag[i], "pCtx->sMb.pIntraNxNAvailFlag");
+
+      pCtx->sMb.pIntraNxNAvailFlag[i] = NULL;
+    }
+
+    if (pCtx->sMb.pChromaPredMode[i]) {
+      pMa->WelsFree (pCtx->sMb.pChromaPredMode[i], "pCtx->sMb.pChromaPredMode[]");
+
+      pCtx->sMb.pChromaPredMode[i] = NULL;
+    }
+
+    if (pCtx->sMb.pCbp[i]) {
+      pMa->WelsFree (pCtx->sMb.pCbp[i], "pCtx->sMb.pCbp[]");
+
+      pCtx->sMb.pCbp[i] = NULL;
+    }
+
+    //      if (pCtx->sMb.pMotionPredFlag[i])
+    //{
+    //  pMa->WelsFree( pCtx->sMb.pMotionPredFlag[i], "pCtx->sMb.pMotionPredFlag[]" );
+
+    //  pCtx->sMb.pMotionPredFlag[i] = NULL;
+    //}
+
+    if (pCtx->sMb.pSubMbType[i]) {
+      pMa->WelsFree (pCtx->sMb.pSubMbType[i], "pCtx->sMb.pSubMbType[]");
+
+      pCtx->sMb.pSubMbType[i] = NULL;
+    }
+
+    if (pCtx->sMb.pSliceIdc[i]) {
+      pMa->WelsFree (pCtx->sMb.pSliceIdc[i], "pCtx->sMb.pSliceIdc[]");
+
+      pCtx->sMb.pSliceIdc[i] = NULL;
+    }
+
+    if (pCtx->sMb.pResidualPredFlag[i]) {
+      pMa->WelsFree (pCtx->sMb.pResidualPredFlag[i], "pCtx->sMb.pResidualPredFlag[]");
+
+      pCtx->sMb.pResidualPredFlag[i] = NULL;
+    }
+
+    if (pCtx->sMb.pInterPredictionDoneFlag[i]) {
+      pMa->WelsFree (pCtx->sMb.pInterPredictionDoneFlag[i], "pCtx->sMb.pInterPredictionDoneFlag[]");
+
+      pCtx->sMb.pInterPredictionDoneFlag[i] = NULL;
+    }
+
+    if (pCtx->sMb.pMbCorrectlyDecodedFlag[i]) {
+      pMa->WelsFree (pCtx->sMb.pMbCorrectlyDecodedFlag[i], "pCtx->sMb.pMbCorrectlyDecodedFlag[]");
+      pCtx->sMb.pMbCorrectlyDecodedFlag[i] = NULL;
+    }
+
+    if (pCtx->sMb.pMbRefConcealedFlag[i]) {
+      pMa->WelsFree (pCtx->sMb.pMbRefConcealedFlag[i], "pCtx->sMb.pMbRefConcealedFlag[]");
+      pCtx->sMb.pMbRefConcealedFlag[i] = NULL;
+    }
+    pMa->WelsFree (pDq, "pDq");
+
+    pDq = NULL;
+    pCtx->pDqLayersList[i] = NULL;
+
+    ++ i;
+  } while (i < LAYER_NUM_EXCHANGEABLE);
+
+  pCtx->iPicWidthReq            = 0;
+  pCtx->iPicHeightReq           = 0;
+  pCtx->bInitialDqLayersMem     = false;
+}
+
+void ResetCurrentAccessUnit (PWelsDecoderContext pCtx) {
+  PAccessUnit pCurAu = pCtx->pAccessUnitList;
+  pCurAu->uiStartPos            = 0;
+  pCurAu->uiEndPos              = 0;
+  pCurAu->bCompletedAuFlag      = false;
+  if (pCurAu->uiActualUnitsNum > 0) {
+    uint32_t iIdx = 0;
+    const uint32_t kuiActualNum = pCurAu->uiActualUnitsNum;
+    // a more simpler method to do nal units list management prefered here
+    const uint32_t kuiAvailNum  = pCurAu->uiAvailUnitsNum;
+    const uint32_t kuiLeftNum   = kuiAvailNum - kuiActualNum;
+
+    // Swapping active nal unit nodes of succeeding AU with leading of list
+    while (iIdx < kuiLeftNum) {
+      PNalUnit t = pCurAu->pNalUnitsList[kuiActualNum + iIdx];
+      pCurAu->pNalUnitsList[kuiActualNum + iIdx] = pCurAu->pNalUnitsList[iIdx];
+      pCurAu->pNalUnitsList[iIdx] = t;
+      ++ iIdx;
+    }
+    pCurAu->uiActualUnitsNum = pCurAu->uiAvailUnitsNum = kuiLeftNum;
+  }
+}
+
+/*!
+ * \brief   Force reset current Acess Unit Nal list in case error parsing/decoding in current AU
+ * \author
+ * \history 11/16/2009
+ */
+void ForceResetCurrentAccessUnit (PAccessUnit pAu) {
+  uint32_t uiSucAuIdx = pAu->uiEndPos + 1;
+  uint32_t uiCurAuIdx = 0;
+
+  // swap the succeeding AU's nal units to the front
+  while (uiSucAuIdx < pAu->uiAvailUnitsNum) {
+    PNalUnit t = pAu->pNalUnitsList[uiSucAuIdx];
+    pAu->pNalUnitsList[uiSucAuIdx] = pAu->pNalUnitsList[uiCurAuIdx];
+    pAu->pNalUnitsList[uiCurAuIdx] = t;
+    ++ uiSucAuIdx;
+    ++ uiCurAuIdx;
+  }
+
+  // Update avail/actual units num accordingly for next AU parsing
+  if (pAu->uiAvailUnitsNum > pAu->uiEndPos)
+    pAu->uiAvailUnitsNum -= (pAu->uiEndPos + 1);
+  else
+    pAu->uiAvailUnitsNum = 0;
+  pAu->uiActualUnitsNum = 0;
+  pAu->uiStartPos       = 0;
+  pAu->uiEndPos         = 0;
+  pAu->bCompletedAuFlag = false;
+}
+
+//clear current corrupted NAL from pNalUnitsList
+void ForceClearCurrentNal (PAccessUnit pAu) {
+  if (pAu->uiAvailUnitsNum > 0)
+    -- pAu->uiAvailUnitsNum;
+}
+
+void ForceResetParaSetStatusAndAUList (PWelsDecoderContext pCtx) {
+  pCtx->bSpsExistAheadFlag = false;
+  pCtx->bSubspsExistAheadFlag = false;
+  pCtx->bPpsExistAheadFlag = false;
+
+  // Force clear the AU list
+  pCtx->pAccessUnitList->uiAvailUnitsNum        = 0;
+  pCtx->pAccessUnitList->uiActualUnitsNum       = 0;
+  pCtx->pAccessUnitList->uiStartPos             = 0;
+  pCtx->pAccessUnitList->uiEndPos               = 0;
+  pCtx->pAccessUnitList->bCompletedAuFlag       = false;
+}
+
+void CheckAvailNalUnitsListContinuity (PWelsDecoderContext pCtx, int32_t iStartIdx, int32_t iEndIdx) {
+  PAccessUnit pCurAu = pCtx->pAccessUnitList;
+
+  uint8_t uiLastNuDependencyId, uiLastNuLayerDqId;
+  uint8_t uiCurNuDependencyId, uiCurNuQualityId, uiCurNuLayerDqId, uiCurNuRefLayerDqId;
+
+  int32_t iCurNalUnitIdx = 0;
+
+  //check the continuity of pNalUnitsList forwards (from pIdxNoInterLayerPred to end_postion)
+  uiLastNuDependencyId = pCurAu->pNalUnitsList[iStartIdx]->sNalHeaderExt.uiDependencyId;//starting nal unit
+  uiLastNuLayerDqId   = pCurAu->pNalUnitsList[iStartIdx]->sNalHeaderExt.uiLayerDqId;//starting nal unit
+  iCurNalUnitIdx = iStartIdx + 1;//current nal unit
+  while (iCurNalUnitIdx <= iEndIdx) {
+    uiCurNuDependencyId   = pCurAu->pNalUnitsList[iCurNalUnitIdx]->sNalHeaderExt.uiDependencyId;
+    uiCurNuQualityId      = pCurAu->pNalUnitsList[iCurNalUnitIdx]->sNalHeaderExt.uiQualityId;
+    uiCurNuLayerDqId     = pCurAu->pNalUnitsList[iCurNalUnitIdx]->sNalHeaderExt.uiLayerDqId;
+    uiCurNuRefLayerDqId = pCurAu->pNalUnitsList[iCurNalUnitIdx]->sNalData.sVclNal.sSliceHeaderExt.uiRefLayerDqId;
+
+    if (uiCurNuDependencyId == uiLastNuDependencyId) {
+      uiLastNuLayerDqId = uiCurNuLayerDqId;
+      ++ iCurNalUnitIdx;
+    } else { //uiCurNuDependencyId != uiLastNuDependencyId, new dependency arrive
+      if (uiCurNuQualityId == 0) {
+        uiLastNuDependencyId = uiCurNuDependencyId;
+        if (uiCurNuRefLayerDqId == uiLastNuLayerDqId) {
+          uiLastNuLayerDqId = uiCurNuLayerDqId;
+          ++ iCurNalUnitIdx;
+        } else { //cur_nu_layer_id != next_nu_ref_layer_dq_id, the chain is broken at this point
+          break;
+        }
+      } else { //new dependency arrive, but no base quality layer, so we must stop in this point
+        break;
+      }
+    }
+  }
+
+  -- iCurNalUnitIdx;
+  pCurAu->uiEndPos = iCurNalUnitIdx;
+  pCtx->uiTargetDqId = pCurAu->pNalUnitsList[iCurNalUnitIdx]->sNalHeaderExt.uiLayerDqId;
+}
+
+//main purpose: to support multi-slice and to include all slice which have the same uiDependencyId, uiQualityId and frame_num
+//for single slice, pIdxNoInterLayerPred SHOULD NOT be modified
+void RefineIdxNoInterLayerPred (PAccessUnit pCurAu, int32_t* pIdxNoInterLayerPred) {
+  int32_t iLastNalDependId  = pCurAu->pNalUnitsList[*pIdxNoInterLayerPred]->sNalHeaderExt.uiDependencyId;
+  int32_t iLastNalQualityId = pCurAu->pNalUnitsList[*pIdxNoInterLayerPred]->sNalHeaderExt.uiQualityId;
+  uint8_t uiLastNalTId       = pCurAu->pNalUnitsList[*pIdxNoInterLayerPred]->sNalHeaderExt.uiTemporalId;
+  int32_t iLastNalFrameNum  =
+    pCurAu->pNalUnitsList[*pIdxNoInterLayerPred]->sNalData.sVclNal.sSliceHeaderExt.sSliceHeader.iFrameNum;
+  int32_t iLastNalPoc        =
+    pCurAu->pNalUnitsList[*pIdxNoInterLayerPred]->sNalData.sVclNal.sSliceHeaderExt.sSliceHeader.iPicOrderCntLsb;
+  int32_t iLastNalFirstMb   =
+    pCurAu->pNalUnitsList[*pIdxNoInterLayerPred]->sNalData.sVclNal.sSliceHeaderExt.sSliceHeader.iFirstMbInSlice;
+  int32_t iCurNalDependId, iCurNalQualityId, iCurNalTId, iCurNalFrameNum, iCurNalPoc, iCurNalFirstMb, iCurIdx,
+          iFinalIdxNoInterLayerPred;
+
+  bool  bMultiSliceFind = false;
+
+  iFinalIdxNoInterLayerPred = 0;
+  iCurIdx = *pIdxNoInterLayerPred - 1;
+  while (iCurIdx >= 0) {
+    if (pCurAu->pNalUnitsList[iCurIdx]->sNalHeaderExt.iNoInterLayerPredFlag) {
+      iCurNalDependId  = pCurAu->pNalUnitsList[iCurIdx]->sNalHeaderExt.uiDependencyId;
+      iCurNalQualityId = pCurAu->pNalUnitsList[iCurIdx]->sNalHeaderExt.uiQualityId;
+      iCurNalTId       = pCurAu->pNalUnitsList[iCurIdx]->sNalHeaderExt.uiTemporalId;
+      iCurNalFrameNum  = pCurAu->pNalUnitsList[iCurIdx]->sNalData.sVclNal.sSliceHeaderExt.sSliceHeader.iFrameNum;
+      iCurNalPoc        = pCurAu->pNalUnitsList[iCurIdx]->sNalData.sVclNal.sSliceHeaderExt.sSliceHeader.iPicOrderCntLsb;
+      iCurNalFirstMb   = pCurAu->pNalUnitsList[iCurIdx]->sNalData.sVclNal.sSliceHeaderExt.sSliceHeader.iFirstMbInSlice;
+
+      if (iCurNalDependId == iLastNalDependId  &&
+          iCurNalQualityId == iLastNalQualityId &&
+          iCurNalTId       == uiLastNalTId       &&
+          iCurNalFrameNum  == iLastNalFrameNum  &&
+          iCurNalPoc        == iLastNalPoc        &&
+          iCurNalFirstMb   != iLastNalFirstMb) {
+        bMultiSliceFind = true;
+        iFinalIdxNoInterLayerPred = iCurIdx;
+        --iCurIdx;
+        continue;
+      } else {
+        break;
+      }
+    }
+    --iCurIdx;
+  }
+
+  if (bMultiSliceFind && *pIdxNoInterLayerPred != iFinalIdxNoInterLayerPred) {
+    *pIdxNoInterLayerPred = iFinalIdxNoInterLayerPred;
+  }
+}
+
+bool CheckPocOfCurValidNalUnits (PAccessUnit pCurAu, int32_t pIdxNoInterLayerPred) {
+  int32_t iEndIdx    = pCurAu->uiEndPos;
+  int32_t iCurAuPoc =
+    pCurAu->pNalUnitsList[pIdxNoInterLayerPred]->sNalData.sVclNal.sSliceHeaderExt.sSliceHeader.iPicOrderCntLsb;
+  int32_t iTmpPoc, i;
+  for (i = pIdxNoInterLayerPred + 1; i < iEndIdx; i++) {
+    iTmpPoc = pCurAu->pNalUnitsList[i]->sNalData.sVclNal.sSliceHeaderExt.sSliceHeader.iPicOrderCntLsb;
+    if (iTmpPoc != iCurAuPoc) {
+      return false;
+    }
+  }
+
+  return true;
+}
+
+bool CheckIntegrityNalUnitsList (PWelsDecoderContext pCtx) {
+  PAccessUnit pCurAu = pCtx->pAccessUnitList;
+  const int32_t kiEndPos = pCurAu->uiEndPos;
+  int32_t iIdxNoInterLayerPred = 0;
+
+  if (!pCurAu->bCompletedAuFlag)
+    return false;
+
+  if (pCtx->bNewSeqBegin) {
+    pCurAu->uiStartPos = 0;
+    //step1: search the pNalUnit whose iNoInterLayerPredFlag equal to 1 backwards (from uiEndPos to 0)
+    iIdxNoInterLayerPred = kiEndPos;
+    while (iIdxNoInterLayerPred >= 0) {
+      if (pCurAu->pNalUnitsList[iIdxNoInterLayerPred]->sNalHeaderExt.iNoInterLayerPredFlag) {
+        break;
+      }
+      --iIdxNoInterLayerPred;
+    }
+    if (iIdxNoInterLayerPred < 0) {
+      //can not find the Nal Unit whose no_inter_pred_falg equal to 1, MUST STOP decode
+      return false;
+    }
+
+    //step2: support multi-slice, to include all base layer slice
+    RefineIdxNoInterLayerPred (pCurAu, &iIdxNoInterLayerPred);
+    pCurAu->uiStartPos = iIdxNoInterLayerPred;
+    CheckAvailNalUnitsListContinuity (pCtx, iIdxNoInterLayerPred, kiEndPos);
+
+    if (!CheckPocOfCurValidNalUnits (pCurAu, iIdxNoInterLayerPred)) {
+      return false;
+    }
+
+    pCtx->iCurSeqIntervalTargetDependId = pCurAu->pNalUnitsList[pCurAu->uiEndPos]->sNalHeaderExt.uiDependencyId;
+    pCtx->iCurSeqIntervalMaxPicWidth  =
+      pCurAu->pNalUnitsList[pCurAu->uiEndPos]->sNalData.sVclNal.sSliceHeaderExt.sSliceHeader.iMbWidth << 4;
+    pCtx->iCurSeqIntervalMaxPicHeight =
+      pCurAu->pNalUnitsList[pCurAu->uiEndPos]->sNalData.sVclNal.sSliceHeaderExt.sSliceHeader.iMbHeight << 4;
+  } else { //P_SLICE
+    //step 1: search uiDependencyId equal to pCtx->cur_seq_interval_target_dependency_id
+    bool bGetDependId = false;
+    int32_t iIdxDependId = 0;
+
+    iIdxDependId = kiEndPos;
+    while (iIdxDependId >= 0) {
+      if (pCtx->iCurSeqIntervalTargetDependId == pCurAu->pNalUnitsList[iIdxDependId]->sNalHeaderExt.uiDependencyId) {
+        bGetDependId = true;
+        break;
+      } else {
+        --iIdxDependId;
+      }
+    }
+
+    //step 2: switch according to whether or not find the index of pNalUnit whose uiDependencyId equal to iCurSeqIntervalTargetDependId
+    if (bGetDependId) { //get the index of pNalUnit whose uiDependencyId equal to iCurSeqIntervalTargetDependId
+      bool bGetNoInterPredFront = false;
+      //step 2a: search iNoInterLayerPredFlag [0....iIdxDependId]
+      iIdxNoInterLayerPred = iIdxDependId;
+      while (iIdxNoInterLayerPred >= 0) {
+        if (pCurAu->pNalUnitsList[iIdxNoInterLayerPred]->sNalHeaderExt.iNoInterLayerPredFlag) {
+          bGetNoInterPredFront = true;
+          break;
+        }
+        --iIdxNoInterLayerPred;
+      }
+      //step 2b: switch, whether or not find the NAL unit whose no_inter_pred_flag equal to 1 among [0....iIdxDependId]
+      if (bGetNoInterPredFront) { //YES
+        RefineIdxNoInterLayerPred (pCurAu, &iIdxNoInterLayerPred);
+        pCurAu->uiStartPos = iIdxNoInterLayerPred;
+        CheckAvailNalUnitsListContinuity (pCtx, iIdxNoInterLayerPred, iIdxDependId);
+
+        if (!CheckPocOfCurValidNalUnits (pCurAu, iIdxNoInterLayerPred)) {
+          return false;
+        }
+      } else { //NO, should find the NAL unit whose no_inter_pred_flag equal to 1 among [iIdxDependId....uiEndPos]
+        iIdxNoInterLayerPred = iIdxDependId;
+        while (iIdxNoInterLayerPred <= kiEndPos) {
+          if (pCurAu->pNalUnitsList[iIdxNoInterLayerPred]->sNalHeaderExt.iNoInterLayerPredFlag) {
+            break;
+          }
+          ++iIdxNoInterLayerPred;
+        }
+
+        if (iIdxNoInterLayerPred > kiEndPos) {
+          return false; //cann't find the index of pNalUnit whose no_inter_pred_flag = 1
+        }
+
+        RefineIdxNoInterLayerPred (pCurAu, &iIdxNoInterLayerPred);
+        pCurAu->uiStartPos = iIdxNoInterLayerPred;
+        CheckAvailNalUnitsListContinuity (pCtx, iIdxNoInterLayerPred, kiEndPos);
+
+        if (!CheckPocOfCurValidNalUnits (pCurAu, iIdxNoInterLayerPred)) {
+          return false;
+        }
+      }
+    } else { //without the index of pNalUnit, should process this AU as common case
+      iIdxNoInterLayerPred = kiEndPos;
+      while (iIdxNoInterLayerPred >= 0) {
+        if (pCurAu->pNalUnitsList[iIdxNoInterLayerPred]->sNalHeaderExt.iNoInterLayerPredFlag) {
+          break;
+        }
+        --iIdxNoInterLayerPred;
+      }
+      if (iIdxNoInterLayerPred < 0) {
+        return false; //cann't find the index of pNalUnit whose iNoInterLayerPredFlag = 1
+      }
+
+      RefineIdxNoInterLayerPred (pCurAu, &iIdxNoInterLayerPred);
+      pCurAu->uiStartPos = iIdxNoInterLayerPred;
+      CheckAvailNalUnitsListContinuity (pCtx, iIdxNoInterLayerPred, kiEndPos);
+
+      if (!CheckPocOfCurValidNalUnits (pCurAu, iIdxNoInterLayerPred)) {
+        return false;
+      }
+    }
+  }
+
+  return true;
+}
+
+void CheckOnlyOneLayerInAu (PWelsDecoderContext pCtx) {
+  PAccessUnit pCurAu = pCtx->pAccessUnitList;
+
+  int32_t iEndIdx = pCurAu->uiEndPos;
+  int32_t iCurIdx = pCurAu->uiStartPos;
+  uint8_t uiDId = pCurAu->pNalUnitsList[iCurIdx]->sNalHeaderExt.uiDependencyId;
+  uint8_t uiQId = pCurAu->pNalUnitsList[iCurIdx]->sNalHeaderExt.uiQualityId;
+  uint8_t uiTId = pCurAu->pNalUnitsList[iCurIdx]->sNalHeaderExt.uiTemporalId;
+
+  uint8_t uiCurDId, uiCurQId, uiCurTId;
+
+  pCtx->bOnlyOneLayerInCurAuFlag = true;
+
+  if (iEndIdx == iCurIdx) { //only one NAL in pNalUnitsList
+    return;
+  }
+
+  ++iCurIdx;
+  while (iCurIdx <= iEndIdx) {
+    uiCurDId = pCurAu->pNalUnitsList[iCurIdx]->sNalHeaderExt.uiDependencyId;
+    uiCurQId = pCurAu->pNalUnitsList[iCurIdx]->sNalHeaderExt.uiQualityId;
+    uiCurTId = pCurAu->pNalUnitsList[iCurIdx]->sNalHeaderExt.uiTemporalId;
+
+    if (uiDId != uiCurDId || uiQId != uiCurQId || uiTId != uiCurTId) {
+      pCtx->bOnlyOneLayerInCurAuFlag = false;
+      return;
+    }
+
+    ++iCurIdx;
+  }
+}
+
+int32_t WelsDecodeAccessUnitStart (PWelsDecoderContext pCtx) {
+  // Roll back NAL units not being belong to current access unit list for proceeded access unit
+  int32_t iRet = UpdateAccessUnit (pCtx);
+  if (iRet != ERR_NONE)
+    return iRet;
+
+  pCtx->pAccessUnitList->uiStartPos = 0;
+  if (!pCtx->bAvcBasedFlag && !CheckIntegrityNalUnitsList (pCtx)) {
+    pCtx->iErrorCode |= dsBitstreamError;
+    return dsBitstreamError;
+  }
+
+  //check current AU has only one layer or not
+  //If YES, can use deblocking based on AVC
+  if (!pCtx->bAvcBasedFlag) {
+    CheckOnlyOneLayerInAu (pCtx);
+  }
+
+  return ERR_NONE;
+}
+
+void WelsDecodeAccessUnitEnd (PWelsDecoderContext pCtx) {
+  //save previous header info
+  PAccessUnit pCurAu = pCtx->pAccessUnitList;
+  PNalUnit pCurNal = pCurAu->pNalUnitsList[pCurAu->uiEndPos];
+  memcpy (&pCtx->sLastNalHdrExt, &pCurNal->sNalHeaderExt, sizeof (SNalUnitHeaderExt));
+  memcpy (&pCtx->sLastSliceHeader,
+          &pCurNal->sNalData.sVclNal.sSliceHeaderExt.sSliceHeader, sizeof (SSliceHeader));
+  // uninitialize context of current access unit and rbsp buffer clean
+  ResetCurrentAccessUnit (pCtx);
+}
+
+/* CheckNewSeqBeginAndUpdateActiveLayerSps
+ * return:
+ * true - the AU to be construct is the start of new sequence; false - not
+ */
+static bool CheckNewSeqBeginAndUpdateActiveLayerSps (PWelsDecoderContext pCtx) {
+  bool bNewSeq = false;
+  PAccessUnit pCurAu = pCtx->pAccessUnitList;
+  PSps pTmpLayerSps[MAX_LAYER_NUM];
+  for (int i = 0; i < MAX_LAYER_NUM; i++) {
+    pTmpLayerSps[i] = NULL;
+  }
+  // track the layer sps for the current au
+  for (unsigned int i = pCurAu->uiStartPos; i <= pCurAu->uiEndPos; i++) {
+    uint32_t uiDid = pCurAu->pNalUnitsList[i]->sNalHeaderExt.uiDependencyId;
+    pTmpLayerSps[uiDid] = pCurAu->pNalUnitsList[i]->sNalData.sVclNal.sSliceHeaderExt.sSliceHeader.pSps;
+    if ((pCurAu->pNalUnitsList[i]->sNalHeaderExt.sNalUnitHeader.eNalUnitType == NAL_UNIT_CODED_SLICE_IDR)
+        || (pCurAu->pNalUnitsList[i]->sNalHeaderExt.bIdrFlag))
+      bNewSeq = true;
+  }
+  int iMaxActiveLayer = 0, iMaxCurrentLayer = 0;
+  for (int i = MAX_LAYER_NUM - 1; i >= 0; i--) {
+    if (pCtx->pActiveLayerSps[i] != NULL) {
+      iMaxActiveLayer = i;
+      break;
+    }
+  }
+  for (int i = MAX_LAYER_NUM - 1; i >= 0; i--) {
+    if (pTmpLayerSps[i] != NULL) {
+      iMaxCurrentLayer = i;
+      break;
+    }
+  }
+  if ((iMaxCurrentLayer != iMaxActiveLayer)
+      || (pTmpLayerSps[iMaxCurrentLayer]  != pCtx->pActiveLayerSps[iMaxActiveLayer])) {
+    bNewSeq = true;
+  }
+  // fill active sps if the current sps is not null while active layer is null
+  if (!bNewSeq) {
+    for (int i = 0; i < MAX_LAYER_NUM; i++) {
+      if (pCtx->pActiveLayerSps[i] == NULL && pTmpLayerSps[i] != NULL) {
+        pCtx->pActiveLayerSps[i] = pTmpLayerSps[i];
+      }
+    }
+  } else {
+    // UpdateActiveLayerSps if new sequence start
+    memcpy (&pCtx->pActiveLayerSps[0], &pTmpLayerSps[0], MAX_LAYER_NUM * sizeof (PSps));
+  }
+  return bNewSeq;
+}
+
+static void WriteBackActiveParameters (PWelsDecoderContext pCtx) {
+  if (pCtx->iOverwriteFlags & OVERWRITE_PPS) {
+    memcpy (&pCtx->sPpsBuffer[pCtx->sPpsBuffer[MAX_PPS_COUNT].iPpsId], &pCtx->sPpsBuffer[MAX_PPS_COUNT], sizeof (SPps));
+  }
+  if (pCtx->iOverwriteFlags & OVERWRITE_SPS) {
+    memcpy (&pCtx->sSpsBuffer[pCtx->sSpsBuffer[MAX_SPS_COUNT].iSpsId], &pCtx->sSpsBuffer[MAX_SPS_COUNT], sizeof (SSps));
+    pCtx->bNewSeqBegin = true;
+  }
+  if (pCtx->iOverwriteFlags & OVERWRITE_SUBSETSPS) {
+    memcpy (&pCtx->sSubsetSpsBuffer[pCtx->sSubsetSpsBuffer[MAX_SPS_COUNT].sSps.iSpsId],
+            &pCtx->sSubsetSpsBuffer[MAX_SPS_COUNT], sizeof (SSubsetSps));
+    pCtx->bNewSeqBegin = true;
+  }
+  pCtx->iOverwriteFlags = OVERWRITE_NONE;
+}
+
+/*
+ * DecodeFinishUpdate
+ * decoder finish decoding, update active parameter sets and new seq status
+ *
+ */
+
+void DecodeFinishUpdate (PWelsDecoderContext pCtx) {
+  pCtx->bNewSeqBegin = false;
+  WriteBackActiveParameters (pCtx);
+  pCtx->bNewSeqBegin = pCtx->bNewSeqBegin || pCtx->bNextNewSeqBegin;
+  pCtx->bNextNewSeqBegin = false; // reset it
+  if (pCtx->bNewSeqBegin)
+    ResetActiveSPSForEachLayer (pCtx);
+}
+
+/*
+ * ConstructAccessUnit
+ * construct an access unit for given input bitstream, maybe partial NAL Unit, one or more Units are involved to
+ * joint a collective access unit.
+ * parameter\
+ *  buf:        bitstream data buffer
+ *  bit_len:    size in bit length of data
+ *  buf_len:    size in byte length of data
+ *  coded_au:   mark an Access Unit decoding finished
+ * return:
+ *  0 - success; otherwise returned error_no defined in error_no.h
+ */
+int32_t ConstructAccessUnit (PWelsDecoderContext pCtx, uint8_t** ppDst, SBufferInfo* pDstInfo) {
+  int32_t iErr;
+  PAccessUnit pCurAu = pCtx->pAccessUnitList;
+  pCtx->bAuReadyFlag = false;
+  pCtx->bLastHasMmco5 = false;
+  bool bTmpNewSeqBegin = CheckNewSeqBeginAndUpdateActiveLayerSps (pCtx);
+  pCtx->bNewSeqBegin = pCtx->bNewSeqBegin || bTmpNewSeqBegin;
+  iErr = WelsDecodeAccessUnitStart (pCtx);
+  GetVclNalTemporalId (pCtx);
+
+  if (ERR_NONE != iErr) {
+    ForceResetCurrentAccessUnit (pCtx->pAccessUnitList);
+    if (!pCtx->pParam->bParseOnly)
+      pDstInfo->iBufferStatus = 0;
+    pCtx->bNewSeqBegin = pCtx->bNewSeqBegin || pCtx->bNextNewSeqBegin;
+    pCtx->bNextNewSeqBegin = false; // reset it
+    if (pCtx->bNewSeqBegin)
+      ResetActiveSPSForEachLayer (pCtx);
+    return iErr;
+  }
+
+  pCtx->pSps = pCurAu->pNalUnitsList[pCurAu->uiStartPos]->sNalData.sVclNal.sSliceHeaderExt.sSliceHeader.pSps;
+  pCtx->pPps = pCurAu->pNalUnitsList[pCurAu->uiStartPos]->sNalData.sVclNal.sSliceHeaderExt.sSliceHeader.pPps;
+
+  //try to allocate or relocate DPB memory only when new sequence is coming.
+  if (pCtx->bNewSeqBegin) {
+    WelsResetRefPic (pCtx); //clear ref pPic when IDR NAL
+    iErr = SyncPictureResolutionExt (pCtx, pCtx->pSps->iMbWidth, pCtx->pSps->iMbHeight);
+
+    if (ERR_NONE != iErr) {
+      WelsLog (& (pCtx->sLogCtx), WELS_LOG_WARNING, "sync picture resolution ext failed,  the error is %d", iErr);
+      return iErr;
+    }
+  }
+
+  iErr = DecodeCurrentAccessUnit (pCtx, ppDst, pDstInfo);
+
+  WelsDecodeAccessUnitEnd (pCtx);
+
+  if (ERR_NONE != iErr) {
+    WelsLog (& (pCtx->sLogCtx), WELS_LOG_DEBUG, "returned error from decoding:[0x%x]", iErr);
+    return iErr;
+  }
+
+  return ERR_NONE;
+}
+
+static inline void InitDqLayerInfo (PDqLayer pDqLayer, PLayerInfo pLayerInfo, PNalUnit pNalUnit, PPicture pPicDec) {
+  PNalUnitHeaderExt pNalHdrExt    = &pNalUnit->sNalHeaderExt;
+  PSliceHeaderExt pShExt          = &pNalUnit->sNalData.sVclNal.sSliceHeaderExt;
+  PSliceHeader pSh                = &pShExt->sSliceHeader;
+  const uint8_t kuiQualityId      = pNalHdrExt->uiQualityId;
+
+  memcpy (&pDqLayer->sLayerInfo, pLayerInfo, sizeof (SLayerInfo)); //confirmed_safe_unsafe_usage
+
+  pDqLayer->pDec        = pPicDec;
+  pDqLayer->iMbWidth    = pSh->iMbWidth;        // MB width of this picture
+  pDqLayer->iMbHeight   = pSh->iMbHeight;// MB height of this picture
+
+  pDqLayer->iSliceIdcBackup = (pSh->iFirstMbInSlice << 7) | (pNalHdrExt->uiDependencyId << 4) | (pNalHdrExt->uiQualityId);
+
+  /* Common syntax elements across all slices of a DQLayer */
+  pDqLayer->uiPpsId                                     = pLayerInfo->pPps->iPpsId;
+  pDqLayer->uiDisableInterLayerDeblockingFilterIdc      = pShExt->uiDisableInterLayerDeblockingFilterIdc;
+  pDqLayer->iInterLayerSliceAlphaC0Offset               = pShExt->iInterLayerSliceAlphaC0Offset;
+  pDqLayer->iInterLayerSliceBetaOffset                  = pShExt->iInterLayerSliceBetaOffset;
+  pDqLayer->iSliceGroupChangeCycle                      = pSh->iSliceGroupChangeCycle;
+  pDqLayer->bStoreRefBasePicFlag                        = pShExt->bStoreRefBasePicFlag;
+  pDqLayer->bTCoeffLevelPredFlag                        = pShExt->bTCoeffLevelPredFlag;
+  pDqLayer->bConstrainedIntraResamplingFlag             = pShExt->bConstrainedIntraResamplingFlag;
+  pDqLayer->uiRefLayerDqId                              = pShExt->uiRefLayerDqId;
+  pDqLayer->uiRefLayerChromaPhaseXPlus1Flag             = pShExt->uiRefLayerChromaPhaseXPlus1Flag;
+  pDqLayer->uiRefLayerChromaPhaseYPlus1                 = pShExt->uiRefLayerChromaPhaseYPlus1;
+  pDqLayer->bUseWeightPredictionFlag                    = false;
+  pDqLayer->bUseWeightedBiPredIdc = false;
+  //memcpy(&pDqLayer->sScaledRefLayer, &pShExt->sScaledRefLayer, sizeof(SPosOffset));//confirmed_safe_unsafe_usage
+
+  if (kuiQualityId == BASE_QUALITY_ID) {
+    pDqLayer->pRefPicListReordering = &pSh->pRefPicListReordering;
+    pDqLayer->pRefPicMarking = &pSh->sRefMarking;
+
+    pDqLayer->bUseWeightPredictionFlag = pSh->pPps->bWeightedPredFlag;
+    pDqLayer->bUseWeightedBiPredIdc = pSh->pPps->uiWeightedBipredIdc != 0;
+    if (pSh->pPps->bWeightedPredFlag || pSh->pPps->uiWeightedBipredIdc) {
+      pDqLayer->pPredWeightTable = &pSh->sPredWeightTable;
+    }
+    pDqLayer->pRefPicBaseMarking        = &pShExt->sRefBasePicMarking;
+  }
+
+  pDqLayer->uiLayerDqId                 = pNalHdrExt->uiLayerDqId;      // dq_id of current layer
+  pDqLayer->bUseRefBasePicFlag          = pNalHdrExt->bUseRefBasePicFlag;
+}
+
+void WelsDqLayerDecodeStart (PWelsDecoderContext pCtx, PNalUnit pCurNal, PSps pSps, PPps pPps) {
+  PSliceHeader pSh = &pCurNal->sNalData.sVclNal.sSliceHeaderExt.sSliceHeader;
+
+  pCtx->eSliceType   = pSh->eSliceType;
+  pCtx->pSliceHeader = pSh;
+  pCtx->bUsedAsRef   = false;
+
+  pCtx->iFrameNum    = pSh->iFrameNum;
+  UpdateDecoderStatisticsForActiveParaset (& (pCtx->sDecoderStatistics),
+      pSps, pPps);
+}
+
+int32_t InitRefPicList (PWelsDecoderContext pCtx, const uint8_t kuiNRi, int32_t iPoc) {
+  int32_t iRet = ERR_NONE;
+  if (pCtx->eSliceType == B_SLICE) {
+    iRet = WelsInitBSliceRefList (pCtx, iPoc);
+    CreateImplicitWeightTable (pCtx);
+  } else
+    iRet = WelsInitRefList (pCtx, iPoc);
+  if ((pCtx->eSliceType != I_SLICE && pCtx->eSliceType != SI_SLICE)) {
+#if 0
+    if (pCtx->pSps->uiProfileIdc != 66 && pCtx->pPps->bEntropyCodingModeFlag)
+      iRet = WelsReorderRefList2 (pCtx);
+    else
+#endif
+      iRet = WelsReorderRefList (pCtx);
+  }
+
+  return iRet;
+}
+
+void InitCurDqLayerData (PWelsDecoderContext pCtx, PDqLayer pCurDq) {
+  if (NULL != pCtx && NULL != pCurDq) {
+    pCurDq->pMbType         = pCtx->sMb.pMbType[0];
+    pCurDq->pSliceIdc       = pCtx->sMb.pSliceIdc[0];
+    pCurDq->pMv[LIST_0]         = pCtx->sMb.pMv[0][LIST_0];
+    pCurDq->pMv[LIST_1]         = pCtx->sMb.pMv[0][LIST_1];
+    pCurDq->pRefIndex[LIST_0]    = pCtx->sMb.pRefIndex[0][LIST_0];
+    pCurDq->pRefIndex[LIST_1]   = pCtx->sMb.pRefIndex[0][LIST_1];
+    pCurDq->pDirect             = pCtx->sMb.pDirect[0];
+    pCurDq->pNoSubMbPartSizeLessThan8x8Flag = pCtx->sMb.pNoSubMbPartSizeLessThan8x8Flag[0];
+    pCurDq->pTransformSize8x8Flag = pCtx->sMb.pTransformSize8x8Flag[0];
+    pCurDq->pLumaQp         = pCtx->sMb.pLumaQp[0];
+    pCurDq->pChromaQp       = pCtx->sMb.pChromaQp[0];
+    pCurDq->pMvd[LIST_0]         = pCtx->sMb.pMvd[0][LIST_0];
+    pCurDq->pMvd[LIST_1]          = pCtx->sMb.pMvd[0][LIST_1];
+    pCurDq->pCbfDc          = pCtx->sMb.pCbfDc[0];
+    pCurDq->pNzc            = pCtx->sMb.pNzc[0];
+    pCurDq->pNzcRs          = pCtx->sMb.pNzcRs[0];
+    pCurDq->pScaledTCoeff   = pCtx->sMb.pScaledTCoeff[0];
+    pCurDq->pIntraPredMode  = pCtx->sMb.pIntraPredMode[0];
+    pCurDq->pIntra4x4FinalMode = pCtx->sMb.pIntra4x4FinalMode[0];
+    pCurDq->pIntraNxNAvailFlag = pCtx->sMb.pIntraNxNAvailFlag[0];
+    pCurDq->pChromaPredMode = pCtx->sMb.pChromaPredMode[0];
+    pCurDq->pCbp            = pCtx->sMb.pCbp[0];
+    pCurDq->pSubMbType      = pCtx->sMb.pSubMbType[0];
+    pCurDq->pInterPredictionDoneFlag = pCtx->sMb.pInterPredictionDoneFlag[0];
+    pCurDq->pResidualPredFlag = pCtx->sMb.pResidualPredFlag[0];
+    pCurDq->pMbCorrectlyDecodedFlag = pCtx->sMb.pMbCorrectlyDecodedFlag[0];
+    pCurDq->pMbRefConcealedFlag = pCtx->sMb.pMbRefConcealedFlag[0];
+  }
+}
+
+/*
+ * DecodeCurrentAccessUnit
+ * Decode current access unit when current AU is completed.
+ */
+int32_t DecodeCurrentAccessUnit (PWelsDecoderContext pCtx, uint8_t** ppDst, SBufferInfo* pDstInfo) {
+  int32_t iRefCount[LIST_A];
+  PNalUnit pNalCur = NULL;
+  PAccessUnit pCurAu = pCtx->pAccessUnitList;
+
+  int32_t iIdx = pCurAu->uiStartPos;
+  int32_t iEndIdx = pCurAu->uiEndPos;
+
+  int32_t iPpsId = 0;
+  int32_t iRet = ERR_NONE;
+
+  bool bAllRefComplete = true; // Assume default all ref picutres are complete
+
+  const uint8_t kuiTargetLayerDqId = GetTargetDqId (pCtx->uiTargetDqId, pCtx->pParam);
+  const uint8_t kuiDependencyIdMax = (kuiTargetLayerDqId & 0x7F) >> 4;
+  int16_t iLastIdD = -1, iLastIdQ = -1;
+  int16_t iCurrIdD = 0, iCurrIdQ = 0;
+  uint8_t uiNalRefIdc = 0;
+  bool bFreshSliceAvailable =
+    true; // Another fresh slice comingup for given dq layer, for multiple slices in case of header parts of slices sometimes loss over error-prone channels, 8/14/2008
+
+  //update pCurDqLayer at the starting of AU decoding
+  if (pCtx->bInitialDqLayersMem) {
+    pCtx->pCurDqLayer = pCtx->pDqLayersList[0];
+  }
+
+  InitCurDqLayerData (pCtx, pCtx->pCurDqLayer);
+
+  pNalCur = pCurAu->pNalUnitsList[iIdx];
+  while (iIdx <= iEndIdx) {
+    PDqLayer dq_cur = pCtx->pCurDqLayer;
+    SLayerInfo pLayerInfo;
+    PSliceHeaderExt pShExt = NULL;
+    PSliceHeader pSh = NULL;
+
+    if (pCtx->pDec == NULL) {
+      pCtx->pDec = PrefetchPic (pCtx->pPicBuff);
+      if (pCtx->iTotalNumMbRec != 0)
+        pCtx->iTotalNumMbRec = 0;
+
+      if (NULL == pCtx->pDec) {
+        WelsLog (& (pCtx->sLogCtx), WELS_LOG_ERROR,
+                 "DecodeCurrentAccessUnit()::::::PrefetchPic ERROR, pSps->iNumRefFrames:%d.",
+                 pCtx->pSps->iNumRefFrames);
+        // The error code here need to be separated from the dsOutOfMemory
+        pCtx->iErrorCode |= dsOutOfMemory;
+        return ERR_INFO_REF_COUNT_OVERFLOW;
+      }
+      pCtx->pDec->bNewSeqBegin = pCtx->bNewSeqBegin; //set flag for start decoding
+    } else if (pCtx->iTotalNumMbRec == 0) { //pDec != NULL, already start
+      pCtx->pDec->bNewSeqBegin = pCtx->bNewSeqBegin; //set flag for start decoding
+    }
+    pCtx->pDec->uiTimeStamp = pNalCur->uiTimeStamp;
+
+    if (pCtx->iTotalNumMbRec == 0) { //Picture start to decode
+      for (int32_t i = 0; i < LAYER_NUM_EXCHANGEABLE; ++ i)
+        memset (pCtx->sMb.pSliceIdc[i], 0xff, (pCtx->sMb.iMbWidth * pCtx->sMb.iMbHeight * sizeof (int32_t)));
+      memset (pCtx->pCurDqLayer->pMbCorrectlyDecodedFlag, 0, pCtx->pSps->iMbWidth * pCtx->pSps->iMbHeight * sizeof (bool));
+      memset (pCtx->pCurDqLayer->pMbRefConcealedFlag, 0, pCtx->pSps->iMbWidth * pCtx->pSps->iMbHeight * sizeof (bool));
+      pCtx->pDec->iMbNum = pCtx->pSps->iMbWidth * pCtx->pSps->iMbHeight;
+      pCtx->pDec->iMbEcedNum = 0;
+      pCtx->pDec->iMbEcedPropNum = 0;
+    }
+    pCtx->bRPLRError = false;
+    GetI4LumaIChromaAddrTable (pCtx->iDecBlockOffsetArray, pCtx->pDec->iLinesize[0], pCtx->pDec->iLinesize[1]);
+
+    if (pNalCur->sNalHeaderExt.uiLayerDqId > kuiTargetLayerDqId) { // confirmed pNalCur will never be NULL
+      break; // Per formance it need not to decode the remaining bits any more due to given uiLayerDqId required, 9/2/2009
+    }
+
+    memset (&pLayerInfo, 0, sizeof (SLayerInfo));
+
+    /*
+     *  Loop decoding for slices (even FMO and/ multiple slices) within a dq layer
+     */
+    while (iIdx <= iEndIdx) {
+      bool         bReconstructSlice;
+      iCurrIdQ  = pNalCur->sNalHeaderExt.uiQualityId;
+      iCurrIdD  = pNalCur->sNalHeaderExt.uiDependencyId;
+      pSh       = &pNalCur->sNalData.sVclNal.sSliceHeaderExt.sSliceHeader;
+      pShExt    = &pNalCur->sNalData.sVclNal.sSliceHeaderExt;
+      pCtx->bRPLRError = false;
+      bReconstructSlice = CheckSliceNeedReconstruct (pNalCur->sNalHeaderExt.uiLayerDqId, kuiTargetLayerDqId);
+
+      memcpy (&pLayerInfo.sNalHeaderExt, &pNalCur->sNalHeaderExt, sizeof (SNalUnitHeaderExt)); //confirmed_safe_unsafe_usage
+
+      pCtx->pDec->iFrameNum = pSh->iFrameNum;
+      pCtx->pDec->iFramePoc = pSh->iPicOrderCntLsb; // still can not obtain correct, because current do not support POCtype 2
+      pCtx->pDec->bIdrFlag = pNalCur->sNalHeaderExt.bIdrFlag;
+
+      memcpy (&pLayerInfo.sSliceInLayer.sSliceHeaderExt, pShExt, sizeof (SSliceHeaderExt)); //confirmed_safe_unsafe_usage
+      pLayerInfo.sSliceInLayer.bSliceHeaderExtFlag      = pNalCur->sNalData.sVclNal.bSliceHeaderExtFlag;
+      pLayerInfo.sSliceInLayer.eSliceType               = pSh->eSliceType;
+      pLayerInfo.sSliceInLayer.iLastMbQp                = pSh->iSliceQp;
+      dq_cur->pBitStringAux = &pNalCur->sNalData.sVclNal.sSliceBitsRead;
+
+      uiNalRefIdc = pNalCur->sNalHeaderExt.sNalUnitHeader.uiNalRefIdc;
+
+      iPpsId = pSh->iPpsId;
+
+      pLayerInfo.pPps = pSh->pPps;
+      pLayerInfo.pSps = pSh->pSps;
+      pLayerInfo.pSubsetSps = pShExt->pSubsetSps;
+
+      pCtx->pFmo = &pCtx->sFmoList[iPpsId];
+      iRet = FmoParamUpdate (pCtx->pFmo, pLayerInfo.pSps, pLayerInfo.pPps, &pCtx->iActiveFmoNum, pCtx->pMemAlign);
+      if (ERR_NONE != iRet) {
+        if (iRet == ERR_INFO_OUT_OF_MEMORY) {
+          pCtx->iErrorCode |= dsOutOfMemory;
+          WelsLog (& (pCtx->sLogCtx), WELS_LOG_ERROR, "DecodeCurrentAccessUnit(), Fmo param alloc failed");
+        } else {
+          pCtx->iErrorCode |= dsBitstreamError;
+          WelsLog (& (pCtx->sLogCtx), WELS_LOG_WARNING, "DecodeCurrentAccessUnit(), FmoParamUpdate failed, eSliceType: %d.",
+                   pSh->eSliceType);
+        }
+        return GENERATE_ERROR_NO (ERR_LEVEL_SLICE_HEADER, ERR_INFO_FMO_INIT_FAIL);
+      }
+
+      bFreshSliceAvailable = (iCurrIdD != iLastIdD
+                              || iCurrIdQ != iLastIdQ);        // do not need condition of (first_mb == 0) due multiple slices might be disorder
+
+      WelsDqLayerDecodeStart (pCtx, pNalCur, pLayerInfo.pSps, pLayerInfo.pPps);
+
+      if (iCurrIdQ == BASE_QUALITY_ID) {
+        ST64 (iRefCount, LD64 (pLayerInfo.sSliceInLayer.sSliceHeaderExt.sSliceHeader.uiRefCount));
+      }
+
+      if ((iLastIdD < 0) ||  //case 1: first layer
+          (iLastIdD == iCurrIdD)) { //case 2: same uiDId
+        InitDqLayerInfo (dq_cur, &pLayerInfo, pNalCur, pCtx->pDec);
+
+        if (!dq_cur->sLayerInfo.pSps->bGapsInFrameNumValueAllowedFlag) {
+          const bool kbIdrFlag = dq_cur->sLayerInfo.sNalHeaderExt.bIdrFlag
+                                 || (dq_cur->sLayerInfo.sNalHeaderExt.sNalUnitHeader.eNalUnitType == NAL_UNIT_CODED_SLICE_IDR);
+          // Subclause 8.2.5.2 Decoding process for gaps in frame_num
+          if (!kbIdrFlag  &&
+              pSh->iFrameNum != pCtx->iPrevFrameNum &&
+              pSh->iFrameNum != ((pCtx->iPrevFrameNum + 1) & ((1 << dq_cur->sLayerInfo.pSps->uiLog2MaxFrameNum) - 1))) {
+            WelsLog (& (pCtx->sLogCtx), WELS_LOG_WARNING,
+                     "referencing pictures lost due frame gaps exist, prev_frame_num: %d, curr_frame_num: %d", pCtx->iPrevFrameNum,
+                     pSh->iFrameNum);
+
+            bAllRefComplete = false;
+            pCtx->iErrorCode |= dsRefLost;
+            if (pCtx->pParam->eEcActiveIdc == ERROR_CON_DISABLE) {
+#ifdef LONG_TERM_REF
+              pCtx->bParamSetsLostFlag = true;
+#else
+              pCtx->bReferenceLostAtT0Flag = true;
+#endif
+              return GENERATE_ERROR_NO (ERR_LEVEL_SLICE_HEADER, ERR_INFO_REFERENCE_PIC_LOST);
+            }
+          }
+        }
+
+        if (iCurrIdD == kuiDependencyIdMax && iCurrIdQ == BASE_QUALITY_ID) {
+          iRet = InitRefPicList (pCtx, uiNalRefIdc, pSh->iPicOrderCntLsb);
+          if (iRet) {
+            pCtx->bRPLRError = true;
+            bAllRefComplete = false; // RPLR error, set ref pictures complete flag false
+            HandleReferenceLost (pCtx, pNalCur);
+            WelsLog (& (pCtx->sLogCtx), WELS_LOG_DEBUG,
+                     "reference picture introduced by this frame is lost during transmission! uiTId: %d",
+                     pNalCur->sNalHeaderExt.uiTemporalId);
+            if (pCtx->pParam->eEcActiveIdc == ERROR_CON_DISABLE) {
+              if (pCtx->iTotalNumMbRec == 0)
+                pCtx->pDec = NULL;
+              return iRet;
+            }
+          }
+        }
+
+        iRet = WelsDecodeSlice (pCtx, bFreshSliceAvailable, pNalCur);
+
+        //Output good store_base reconstruction when enhancement quality layer occurred error for MGS key picture case
+        if (iRet != ERR_NONE) {
+          WelsLog (& (pCtx->sLogCtx), WELS_LOG_WARNING,
+                   "DecodeCurrentAccessUnit() failed (%d) in frame: %d uiDId: %d uiQId: %d",
+                   iRet, pSh->iFrameNum, iCurrIdD, iCurrIdQ);
+          bAllRefComplete = false;
+          HandleReferenceLostL0 (pCtx, pNalCur);
+          if (pCtx->pParam->eEcActiveIdc == ERROR_CON_DISABLE) {
+            if (pCtx->iTotalNumMbRec == 0)
+              pCtx->pDec = NULL;
+            return iRet;
+          }
+        }
+
+        if (bReconstructSlice) {
+          if ((iRet = WelsDecodeConstructSlice (pCtx, pNalCur)) != ERR_NONE) {
+            pCtx->pDec->bIsComplete = false; // reconstruction error, directly set the flag false
+            return iRet;
+          }
+        }
+        if (bAllRefComplete && pCtx->eSliceType != I_SLICE) {
+          if (pCtx->sRefPic.uiRefCount[LIST_0] > 0) {
+            bAllRefComplete &= CheckRefPicturesComplete (pCtx);
+          } else {
+            bAllRefComplete = false;
+          }
+        }
+      }
+#if defined (_DEBUG) &&  !defined (CODEC_FOR_TESTBED)
+      WelsLog (& (pCtx->sLogCtx), WELS_LOG_DEBUG, "cur_frame : %d\tiCurrIdD : %d\n ",
+               dq_cur->sLayerInfo.sSliceInLayer.sSliceHeaderExt.sSliceHeader.iFrameNum, iCurrIdD);
+#endif//#if !CODEC_FOR_TESTBED
+      iLastIdD = iCurrIdD;
+      iLastIdQ = iCurrIdQ;
+
+      //pNalUnitsList overflow.
+      ++ iIdx;
+      if (iIdx <= iEndIdx) {
+        pNalCur = pCurAu->pNalUnitsList[iIdx];
+      } else {
+        pNalCur = NULL;
+      }
+
+      if (pNalCur == NULL ||
+          iLastIdD != pNalCur->sNalHeaderExt.uiDependencyId ||
+          iLastIdQ != pNalCur->sNalHeaderExt.uiQualityId)
+        break;
+    }
+
+    // Set the current dec picture complete flag. The flag will be reset when current picture need do ErrorCon.
+    pCtx->pDec->bIsComplete = bAllRefComplete;
+    if (!pCtx->pDec->bIsComplete) {  // Ref pictures ECed, result in ECed
+      pCtx->iErrorCode |= dsDataErrorConcealed;
+    }
+
+    // A dq layer decoded here
+#if defined (_DEBUG) &&  !defined (CODEC_FOR_TESTBED)
+#undef fprintf
+    WelsLog (& (pCtx->sLogCtx), WELS_LOG_DEBUG, "POC: #%d, FRAME: #%d, D: %d, Q: %d, T: %d, P: %d, %d\n",
+             pSh->iPicOrderCntLsb, pSh->iFrameNum, iCurrIdD, iCurrIdQ, dq_cur->sLayerInfo.sNalHeaderExt.uiTemporalId,
+             dq_cur->sLayerInfo.sNalHeaderExt.uiPriorityId, dq_cur->sLayerInfo.sSliceInLayer.sSliceHeaderExt.sSliceHeader.iSliceQp);
+#endif//#if !CODEC_FOR_TESTBED
+
+    if (dq_cur->uiLayerDqId == kuiTargetLayerDqId) {
+      if (!pCtx->bInstantDecFlag) {
+        if (!pCtx->pParam->bParseOnly) {
+          //Do error concealment here
+          if ((NeedErrorCon (pCtx)) && (pCtx->pParam->eEcActiveIdc != ERROR_CON_DISABLE)) {
+            ImplementErrorCon (pCtx);
+            pCtx->iTotalNumMbRec = pCtx->pSps->iMbWidth * pCtx->pSps->iMbHeight;
+            pCtx->pDec->iSpsId = pCtx->pSps->iSpsId;
+            pCtx->pDec->iPpsId = pCtx->pPps->iPpsId;
+          }
+        }
+      }
+
+      iRet = DecodeFrameConstruction (pCtx, ppDst, pDstInfo);
+      if (iRet)
+        return iRet;
+
+      pCtx->pPreviousDecodedPictureInDpb = pCtx->pDec; //store latest decoded picture for EC
+      pCtx->bUsedAsRef = false;
+      if (uiNalRefIdc > 0) {
+        pCtx->bUsedAsRef = true;
+        //save MBType, MV and RefIndex for use in B-Slice direct mode
+        memcpy (pCtx->pDec->pMbType, pCtx->pCurDqLayer->pMbType, pCtx->sMb.iMbWidth * pCtx->sMb.iMbHeight * sizeof (uint32_t));
+        memcpy (pCtx->pDec->pMv[LIST_0], pCtx->pCurDqLayer->pMv[LIST_0],
+                pCtx->sMb.iMbWidth * pCtx->sMb.iMbHeight * sizeof (int16_t) * MV_A * MB_BLOCK4x4_NUM);
+        memcpy (pCtx->pDec->pMv[LIST_1], pCtx->pCurDqLayer->pMv[LIST_1],
+                pCtx->sMb.iMbWidth * pCtx->sMb.iMbHeight * sizeof (int16_t) * MV_A * MB_BLOCK4x4_NUM);
+        memcpy (pCtx->pDec->pRefIndex[LIST_0], pCtx->pCurDqLayer->pRefIndex[LIST_0],
+                pCtx->sMb.iMbWidth * pCtx->sMb.iMbHeight * sizeof (int8_t) * MB_BLOCK4x4_NUM);
+        memcpy (pCtx->pDec->pRefIndex[LIST_1], pCtx->pCurDqLayer->pRefIndex[LIST_1],
+                pCtx->sMb.iMbWidth * pCtx->sMb.iMbHeight * sizeof (int8_t) * MB_BLOCK4x4_NUM);
+        for (int32_t listIdx = LIST_0; listIdx < LIST_A; ++listIdx) {
+          for (uint32_t i = 0; i < pCtx->sRefPic.uiRefCount[listIdx]; ++i) {
+            pCtx->pDec->pRefPic[listIdx][i] = pCtx->sRefPic.pRefList[listIdx][i];
+          }
+        }
+        iRet = WelsMarkAsRef (pCtx);
+        if (iRet != ERR_NONE) {
+          if (iRet == ERR_INFO_DUPLICATE_FRAME_NUM)
+            pCtx->iErrorCode |= dsBitstreamError;
+          if (pCtx->pParam->eEcActiveIdc == ERROR_CON_DISABLE) {
+            pCtx->pDec = NULL;
+            return iRet;
+          }
+        }
+        if (!pCtx->pParam->bParseOnly)
+          ExpandReferencingPicture (pCtx->pDec->pData, pCtx->pDec->iWidthInPixel, pCtx->pDec->iHeightInPixel,
+                                    pCtx->pDec->iLinesize,
+                                    pCtx->sExpandPicFunc.pfExpandLumaPicture, pCtx->sExpandPicFunc.pfExpandChromaPicture);
+      }
+      pCtx->pDec = NULL; //after frame decoding, always set to NULL
+    }
+
+    // need update frame_num due current frame is well decoded
+    if (pCurAu->pNalUnitsList[pCurAu->uiStartPos]->sNalHeaderExt.sNalUnitHeader.uiNalRefIdc > 0)
+      pCtx->iPrevFrameNum = pSh->iFrameNum;
+    if (pCtx->bLastHasMmco5)
+      pCtx->iPrevFrameNum = 0;
+  }
+
+  return ERR_NONE;
+}
+
+bool CheckAndFinishLastPic (PWelsDecoderContext pCtx, uint8_t** ppDst, SBufferInfo* pDstInfo) {
+  PAccessUnit pAu = pCtx->pAccessUnitList;
+  bool bAuBoundaryFlag = false;
+  if (IS_VCL_NAL (pCtx->sCurNalHead.eNalUnitType, 1)) { //VCL data, AU list should have data
+    PNalUnit pCurNal = pAu->pNalUnitsList[pAu->uiEndPos];
+    bAuBoundaryFlag = (pCtx->iTotalNumMbRec != 0)
+                      && (CheckAccessUnitBoundaryExt (&pCtx->sLastNalHdrExt, &pCurNal->sNalHeaderExt, &pCtx->sLastSliceHeader,
+                          &pCurNal->sNalData.sVclNal.sSliceHeaderExt.sSliceHeader));
+  } else { //non VCL
+    if (pCtx->sCurNalHead.eNalUnitType == NAL_UNIT_AU_DELIMITER) {
+      bAuBoundaryFlag = true;
+    } else if (pCtx->sCurNalHead.eNalUnitType == NAL_UNIT_SEI) {
+      bAuBoundaryFlag = true;
+    } else if (pCtx->sCurNalHead.eNalUnitType == NAL_UNIT_SPS) {
+      bAuBoundaryFlag = !! (pCtx->iOverwriteFlags & OVERWRITE_SPS);
+    } else if (pCtx->sCurNalHead.eNalUnitType == NAL_UNIT_SUBSET_SPS) {
+      bAuBoundaryFlag = !! (pCtx->iOverwriteFlags & OVERWRITE_SUBSETSPS);
+    } else if (pCtx->sCurNalHead.eNalUnitType == NAL_UNIT_PPS) {
+      bAuBoundaryFlag = !! (pCtx->iOverwriteFlags & OVERWRITE_PPS);
+    }
+    if (bAuBoundaryFlag && pCtx->pAccessUnitList->uiAvailUnitsNum != 0) { //Construct remaining data first
+      ConstructAccessUnit (pCtx, ppDst, pDstInfo);
+    }
+  }
+
+  //Do Error Concealment here
+  if (bAuBoundaryFlag && (pCtx->iTotalNumMbRec != 0) && NeedErrorCon (pCtx)) { //AU ready but frame not completely reconed
+    if (pCtx->pParam->eEcActiveIdc != ERROR_CON_DISABLE) {
+      ImplementErrorCon (pCtx);
+      pCtx->iTotalNumMbRec = pCtx->pSps->iMbWidth * pCtx->pSps->iMbHeight;
+      pCtx->pDec->iSpsId = pCtx->pSps->iSpsId;
+      pCtx->pDec->iPpsId = pCtx->pPps->iPpsId;
+
+      DecodeFrameConstruction (pCtx, ppDst, pDstInfo);
+      pCtx->pPreviousDecodedPictureInDpb = pCtx->pDec; //save ECed pic for future use
+      if (pCtx->sLastNalHdrExt.sNalUnitHeader.uiNalRefIdc > 0) {
+        MarkECFrameAsRef (pCtx);
+      }
+    } else if (pCtx->pParam->bParseOnly) { //clear parse only internal data status
+      pCtx->pParserBsInfo->iNalNum = 0;
+      pCtx->bFrameFinish = true; //clear frame pending status here!
+    } else {
+      if (DecodeFrameConstruction (pCtx, ppDst, pDstInfo)) {
+        if ((pCtx->sLastNalHdrExt.sNalUnitHeader.uiNalRefIdc > 0) && (pCtx->sLastNalHdrExt.uiTemporalId == 0))
+          pCtx->iErrorCode |= dsNoParamSets;
+        else
+          pCtx->iErrorCode |= dsBitstreamError;
+        pCtx->pDec = NULL;
+        return false;
+      }
+    }
+    pCtx->pDec = NULL;
+    if (pAu->pNalUnitsList[pAu->uiStartPos]->sNalHeaderExt.sNalUnitHeader.uiNalRefIdc > 0)
+      pCtx->iPrevFrameNum = pCtx->sLastSliceHeader.iFrameNum; //save frame_num
+    if (pCtx->bLastHasMmco5)
+      pCtx->iPrevFrameNum = 0;
+  }
+  return ERR_NONE;
+}
+
+bool CheckRefPicturesComplete (PWelsDecoderContext pCtx) {
+  // Multi Reference, RefIdx may differ
+  bool bAllRefComplete = true;
+  int32_t iRealMbIdx = pCtx->pCurDqLayer->sLayerInfo.sSliceInLayer.sSliceHeaderExt.sSliceHeader.iFirstMbInSlice;
+  for (int32_t iMbIdx = 0; bAllRefComplete
+       && iMbIdx < pCtx->pCurDqLayer->sLayerInfo.sSliceInLayer.iTotalMbInCurSlice; iMbIdx++) {
+    switch (pCtx->pCurDqLayer->pMbType[iRealMbIdx]) {
+    case MB_TYPE_SKIP:
+    case MB_TYPE_16x16:
+      bAllRefComplete &= pCtx->sRefPic.pRefList[ LIST_0 ][ pCtx->pCurDqLayer->pRefIndex[0][iRealMbIdx][0] ]->bIsComplete;
+      break;
+
+    case MB_TYPE_16x8:
+      bAllRefComplete &= pCtx->sRefPic.pRefList[ LIST_0 ][ pCtx->pCurDqLayer->pRefIndex[0][iRealMbIdx][0] ]->bIsComplete;
+      bAllRefComplete &= pCtx->sRefPic.pRefList[ LIST_0 ][ pCtx->pCurDqLayer->pRefIndex[0][iRealMbIdx][8] ]->bIsComplete;
+      break;
+
+    case MB_TYPE_8x16:
+      bAllRefComplete &= pCtx->sRefPic.pRefList[ LIST_0 ][ pCtx->pCurDqLayer->pRefIndex[0][iRealMbIdx][0] ]->bIsComplete;
+      bAllRefComplete &= pCtx->sRefPic.pRefList[ LIST_0 ][ pCtx->pCurDqLayer->pRefIndex[0][iRealMbIdx][2] ]->bIsComplete;
+      break;
+
+    case MB_TYPE_8x8:
+    case MB_TYPE_8x8_REF0:
+      bAllRefComplete &= pCtx->sRefPic.pRefList[ LIST_0 ][ pCtx->pCurDqLayer->pRefIndex[0][iRealMbIdx][0] ]->bIsComplete;
+      bAllRefComplete &= pCtx->sRefPic.pRefList[ LIST_0 ][ pCtx->pCurDqLayer->pRefIndex[0][iRealMbIdx][2] ]->bIsComplete;
+      bAllRefComplete &= pCtx->sRefPic.pRefList[ LIST_0 ][ pCtx->pCurDqLayer->pRefIndex[0][iRealMbIdx][8] ]->bIsComplete;
+      bAllRefComplete &= pCtx->sRefPic.pRefList[ LIST_0 ][ pCtx->pCurDqLayer->pRefIndex[0][iRealMbIdx][10] ]->bIsComplete;
+      break;
+
+    default:
+      break;
+    }
+    iRealMbIdx = (pCtx->pPps->uiNumSliceGroups > 1) ? FmoNextMb (pCtx->pFmo, iRealMbIdx) :
+                 (pCtx->pCurDqLayer->sLayerInfo.sSliceInLayer.sSliceHeaderExt.sSliceHeader.iFirstMbInSlice + iMbIdx);
+    if (iRealMbIdx == -1) //caused by abnormal return of FmoNextMb()
+      return false;
+  }
+  return bAllRefComplete;
+}
+} // namespace WelsDec
--- a/codec/decoder/core/src/error_concealment.cpp
+++ b/codec/decoder/core/src/error_concealment.cpp
@@ -267,7 +267,7 @@
     for (int32_t iMbX = 0; iMbX < iMbWidth; ++iMbX) {
       iMbXyIndex = iMbY * iMbWidth + iMbX;
       if (pMbCorrectlyDecodedFlag[iMbXyIndex] && IS_INTER (pCurDqLayer->pMbType[iMbXyIndex])) {
-        int32_t iMBType = pCurDqLayer->pMbType[iMbXyIndex];
+        uint32_t iMBType = pCurDqLayer->pMbType[iMbXyIndex];
         switch (iMBType) {
         case MB_TYPE_SKIP:
         case MB_TYPE_16x16:
--- a/codec/decoder/core/src/get_intra_predictor.cpp
+++ b/codec/decoder/core/src/get_intra_predictor.cpp
@@ -54,10 +54,10 @@
 void WelsI4x4LumaPredV_c (uint8_t* pPred, const int32_t kiStride) {
   const uint32_t kuiVal = LD32A4 (pPred - kiStride);
 
-  ST32A4 (pPred                                 , kuiVal);
-  ST32A4 (pPred + kiStride                      , kuiVal);
-  ST32A4 (pPred + (kiStride << 1)               , kuiVal);
-  ST32A4 (pPred + (kiStride << 1) + kiStride    , kuiVal);
+  ST32A4 (pPred, kuiVal);
+  ST32A4 (pPred + kiStride, kuiVal);
+  ST32A4 (pPred + (kiStride << 1), kuiVal);
+  ST32A4 (pPred + (kiStride << 1) + kiStride, kuiVal);
 }
 
 void WelsI4x4LumaPredH_c (uint8_t* pPred, const int32_t kiStride) {
@@ -68,8 +68,8 @@
   const uint32_t kuiL2 = 0x01010101U * pPred[-1 + kiStride2];
   const uint32_t kuiL3 = 0x01010101U * pPred[-1 + kiStride3];
 
-  ST32A4 (pPred            , kuiL0);
-  ST32A4 (pPred + kiStride , kuiL1);
+  ST32A4 (pPred, kuiL0);
+  ST32A4 (pPred + kiStride, kuiL1);
   ST32A4 (pPred + kiStride2, kuiL2);
   ST32A4 (pPred + kiStride3, kuiL3);
 }
@@ -81,8 +81,8 @@
                               pPred[-kiStride] + pPred[-kiStride + 1] + pPred[-kiStride + 2] + pPred[-kiStride + 3] + 4) >> 3;
   const uint32_t kuiMean32 = 0x01010101U * kuiMean;
 
-  ST32A4 (pPred            , kuiMean32);
-  ST32A4 (pPred + kiStride , kuiMean32);
+  ST32A4 (pPred, kuiMean32);
+  ST32A4 (pPred + kiStride, kuiMean32);
   ST32A4 (pPred + kiStride2, kuiMean32);
   ST32A4 (pPred + kiStride3, kuiMean32);
 }
@@ -93,8 +93,8 @@
   const uint8_t kuiMean    = (pPred[-1] + pPred[-1 + kiStride] + pPred[-1 + kiStride2] + pPred[-1 + kiStride3] + 2) >> 2;
   const uint32_t kuiMean32 = 0x01010101U * kuiMean;
 
-  ST32A4 (pPred            , kuiMean32);
-  ST32A4 (pPred + kiStride , kuiMean32);
+  ST32A4 (pPred, kuiMean32);
+  ST32A4 (pPred + kiStride, kuiMean32);
   ST32A4 (pPred + kiStride2, kuiMean32);
   ST32A4 (pPred + kiStride3, kuiMean32);
 }
@@ -106,8 +106,8 @@
                              >> 2;
   const uint32_t kuiMean32 = 0x01010101U * kuiMean;
 
-  ST32A4 (pPred            , kuiMean32);
-  ST32A4 (pPred + kiStride , kuiMean32);
+  ST32A4 (pPred, kuiMean32);
+  ST32A4 (pPred + kiStride, kuiMean32);
   ST32A4 (pPred + kiStride2, kuiMean32);
   ST32A4 (pPred + kiStride3, kuiMean32);
 }
@@ -115,9 +115,9 @@
 void WelsI4x4LumaPredDcNA_c (uint8_t* pPred, const int32_t kiStride) {
   const uint32_t kuiDC32 = 0x80808080U;
 
-  ST32A4 (pPred                             , kuiDC32);
-  ST32A4 (pPred + kiStride                  , kuiDC32);
-  ST32A4 (pPred + (kiStride << 1)           , kuiDC32);
+  ST32A4 (pPred, kuiDC32);
+  ST32A4 (pPred + kiStride, kuiDC32);
+  ST32A4 (pPred + (kiStride << 1), kuiDC32);
   ST32A4 (pPred + (kiStride << 1) + kiStride, kuiDC32);
 }
 
@@ -144,8 +144,8 @@
   const uint8_t kuiDDL6 = (2 + kuiT6 + kuiT7 + (kuiT7 << 1)) >> 2;      // kDDL6
   const uint8_t kuiList[8] = { kuiDDL0, kuiDDL1, kuiDDL2, kuiDDL3, kuiDDL4, kuiDDL5, kuiDDL6, 0 };
 
-  ST32A4 (pPred            , LD32 (kuiList));
-  ST32A4 (pPred + kiStride , LD32 (kuiList + 1));
+  ST32A4 (pPred, LD32 (kuiList));
+  ST32A4 (pPred + kiStride, LD32 (kuiList + 1));
   ST32A4 (pPred + kiStride2, LD32 (kuiList + 2));
   ST32A4 (pPred + kiStride3, LD32 (kuiList + 3));
 }
@@ -168,7 +168,7 @@
   const uint8_t kuiDLT1 = (kuiT12 + kuiT23) >> 2;       // kDLT1
   const uint8_t kuiDLT2 = (kuiT23 + kuiT33) >> 2;       // kDLT2
   const uint8_t kuiDLT3 = kuiT33 >> 1;                  // kDLT3
-  const uint8_t kuiList[8] = { kuiDLT0, kuiDLT1, kuiDLT2, kuiDLT3, kuiDLT3, kuiDLT3, kuiDLT3 , kuiDLT3 };
+  const uint8_t kuiList[8] = { kuiDLT0, kuiDLT1, kuiDLT2, kuiDLT3, kuiDLT3, kuiDLT3, kuiDLT3, kuiDLT3 };
 
   ST32A4 (pPred,             LD32 (kuiList));
   ST32A4 (pPred + kiStride,  LD32 (kuiList + 1));
@@ -210,8 +210,8 @@
   const uint8_t kuiDDR6 = (kuiL12 + kuiL23) >> 2;       // kuiDDR6
   const uint8_t kuiList[8] = { kuiDDR6, kuiDDR5, kuiDDR4, kuiDDR0, kuiDDR1, kuiDDR2, kuiDDR3, 0 };
 
-  ST32A4 (pPred            , LD32 (kuiList + 3));
-  ST32A4 (pPred + kiStride , LD32 (kuiList + 2));
+  ST32A4 (pPred, LD32 (kuiList + 3));
+  ST32A4 (pPred + kiStride, LD32 (kuiList + 2));
   ST32A4 (pPred + kiStride2, LD32 (kuiList + 1));
   ST32A4 (pPred + kiStride3, LD32 (kuiList));
 }
@@ -278,8 +278,8 @@
   const uint8_t kuiVL7          = kuiVL3;
   const uint8_t kuiList[10]     = { kuiVL0, kuiVL1, kuiVL2, kuiVL3, kuiVL3, kuiVL4, kuiVL5, kuiVL6, kuiVL7, kuiVL7 };
 
-  ST32A4 (pPred            , LD32 (kuiList));
-  ST32A4 (pPred + kiStride , LD32 (kuiList + 5));
+  ST32A4 (pPred, LD32 (kuiList));
+  ST32A4 (pPred + kiStride, LD32 (kuiList + 5));
   ST32A4 (pPred + kiStride2, LD32 (kuiList + 1));
   ST32A4 (pPred + kiStride3, LD32 (kuiList + 6));
 }
@@ -310,8 +310,8 @@
   const uint8_t kuiVR9          = (2 + kuiL0 + (kuiL1 << 1) + kuiL2) >> 2;      // kuiVR9
   const uint8_t kuiList[10]     = { kuiVR8, kuiVR0, kuiVR1, kuiVR2, kuiVR3, kuiVR9, kuiVR4, kuiVR5, kuiVR6, kuiVR7 };
 
-  ST32A4 (pPred            , LD32 (kuiList + 1));
-  ST32A4 (pPred + kiStride , LD32 (kuiList + 6));
+  ST32A4 (pPred, LD32 (kuiList + 1));
+  ST32A4 (pPred + kiStride, LD32 (kuiList + 6));
   ST32A4 (pPred + kiStride2, LD32 (kuiList));
   ST32A4 (pPred + kiStride3, LD32 (kuiList + 5));
 }
@@ -336,8 +336,8 @@
   const uint8_t kuiHU5          = (1 + kuiL23 + (kuiL3 << 1)) >> 2;
   const uint8_t kuiList[10]     = { kuiHU0, kuiHU1, kuiHU2, kuiHU3, kuiHU4, kuiHU5, kuiL3, kuiL3, kuiL3, kuiL3 };
 
-  ST32A4 (pPred            , LD32 (kuiList));
-  ST32A4 (pPred + kiStride , LD32 (kuiList + 2));
+  ST32A4 (pPred, LD32 (kuiList));
+  ST32A4 (pPred + kiStride, LD32 (kuiList + 2));
   ST32A4 (pPred + kiStride2, LD32 (kuiList + 4));
   ST32A4 (pPred + kiStride3, LD32 (kuiList + 6));
 }
@@ -374,8 +374,8 @@
   const uint8_t kuiHD9          = (kuiL12 + kuiL23) >> 2;
   const uint8_t kuiList[10]     = { kuiHD8, kuiHD9, kuiHD6, kuiHD7, kuiHD4, kuiHD5, kuiHD0, kuiHD1, kuiHD2, kuiHD3 };
 
-  ST32A4 (pPred            , LD32 (kuiList + 6));
-  ST32A4 (pPred + kiStride , LD32 (kuiList + 4));
+  ST32A4 (pPred, LD32 (kuiList + 6));
+  ST32A4 (pPred + kiStride, LD32 (kuiList + 4));
   ST32A4 (pPred + kiStride2, LD32 (kuiList + 2));
   ST32A4 (pPred + kiStride3, LD32 (kuiList));
 }
@@ -886,14 +886,14 @@
   const int32_t kiStride2 = kiStride  << 1;
   const int32_t kiStride4 = kiStride2 << 1;
 
-  ST64A8 (pPred                        , kuiVal64);
-  ST64A8 (pPred + kiStride               , kuiVal64);
-  ST64A8 (pPred + kiStride2              , kuiVal64);
-  ST64A8 (pPred + kiStride2 + kiStride     , kuiVal64);
-  ST64A8 (pPred + kiStride4              , kuiVal64);
-  ST64A8 (pPred + kiStride4 + kiStride     , kuiVal64);
-  ST64A8 (pPred + kiStride4 + kiStride2    , kuiVal64);
-  ST64A8 (pPred + (kiStride << 3) - kiStride , kuiVal64);
+  ST64A8 (pPred, kuiVal64);
+  ST64A8 (pPred + kiStride, kuiVal64);
+  ST64A8 (pPred + kiStride2, kuiVal64);
+  ST64A8 (pPred + kiStride2 + kiStride, kuiVal64);
+  ST64A8 (pPred + kiStride4, kuiVal64);
+  ST64A8 (pPred + kiStride4 + kiStride, kuiVal64);
+  ST64A8 (pPred + kiStride4 + kiStride2, kuiVal64);
+  ST64A8 (pPred + (kiStride << 3) - kiStride, kuiVal64);
 }
 
 void WelsIChromaPredH_c (uint8_t* pPred, const int32_t kiStride) {
@@ -958,7 +958,7 @@
   const uint64_t kuiUP64        = LD64 (kuiMUP);
   const uint64_t kuiDN64        = LD64 (kuiMDown);
 
-  ST64A8 (pPred       , kuiUP64);
+  ST64A8 (pPred, kuiUP64);
   ST64A8 (pPred + kiL1 + 1, kuiUP64);
   ST64A8 (pPred + kiL2 + 1, kuiUP64);
   ST64A8 (pPred + kiL3 + 1, kuiUP64);
@@ -982,7 +982,7 @@
   const uint64_t kuiUP64 = 0x0101010101010101ULL * kuiMUP;
   const uint64_t kuiDN64 = 0x0101010101010101ULL * kuiMDown;
 
-  ST64A8 (pPred       , kuiUP64);
+  ST64A8 (pPred, kuiUP64);
   ST64A8 (pPred + kiL1 + 1, kuiUP64);
   ST64A8 (pPred + kiL2 + 1, kuiUP64);
   ST64A8 (pPred + kiL3 + 1, kuiUP64);
@@ -1028,7 +1028,7 @@
   uint8_t i = 15;
 
   do {
-    ST64A8 (pPred + iTmp    , kuiTop1);
+    ST64A8 (pPred + iTmp, kuiTop1);
     ST64A8 (pPred + iTmp + 8, kuiTop2);
 
     iTmp -= kiStride;
@@ -1043,7 +1043,7 @@
     const uint8_t kuiVal8   = pPred[iTmp - 1];
     const uint64_t kuiVal64 = 0x0101010101010101ULL * kuiVal8;
 
-    ST64A8 (pPred + iTmp    , kuiVal64);
+    ST64A8 (pPred + iTmp, kuiVal64);
     ST64A8 (pPred + iTmp + 8, kuiVal64);
 
     iTmp -= kiStride;
@@ -1134,7 +1134,7 @@
   iTmp = (kiStride << 4) - kiStride;
   i = 15;
   do {
-    ST64A8 (pPred + iTmp  , uiMean64);
+    ST64A8 (pPred + iTmp, uiMean64);
     ST64A8 (pPred + iTmp + 8, uiMean64);
 
     iTmp -= kiStride;
--- a/codec/decoder/core/src/manage_dec_ref.cpp
+++ b/codec/decoder/core/src/manage_dec_ref.cpp
@@ -1,630 +1,897 @@
-/*!
- * \copy
- *     Copyright (c)  2008-2013, Cisco Systems
- *     All rights reserved.
- *
- *     Redistribution and use in source and binary forms, with or without
- *     modification, are permitted provided that the following conditions
- *     are met:
- *
- *        * Redistributions of source code must retain the above copyright
- *          notice, this list of conditions and the following disclaimer.
- *
- *        * Redistributions in binary form must reproduce the above copyright
- *          notice, this list of conditions and the following disclaimer in
- *          the documentation and/or other materials provided with the
- *          distribution.
- *
- *     THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- *     "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- *     LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
- *     FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
- *     COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
- *     INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
- *     BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
- *     LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
- *     CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- *     LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
- *     ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- *     POSSIBILITY OF SUCH DAMAGE.
- *
- *
- *  manage_dec_ref.cpp
- *
- *  Abstract
- *      Implementation for managing reference picture
- *
- *  History
- *      07/21/2008 Created
- *
- *****************************************************************************/
-
-#include "manage_dec_ref.h"
-#include "error_concealment.h"
-#include "error_code.h"
-
-namespace WelsDec {
-
-static PPicture WelsDelShortFromList (PRefPic pRefPic, int32_t iFrameNum);
-static PPicture WelsDelLongFromList (PRefPic pRefPic, uint32_t uiLongTermFrameIdx);
-static PPicture WelsDelShortFromListSetUnref (PRefPic pRefPic, int32_t iFrameNum);
-static PPicture WelsDelLongFromListSetUnref (PRefPic pRefPic, uint32_t uiLongTermFrameIdx);
-
-static int32_t MMCO (PWelsDecoderContext pCtx, PRefPicMarking pRefPicMarking);
-static int32_t MMCOProcess (PWelsDecoderContext pCtx, uint32_t uiMmcoType,
-                            int32_t iShortFrameNum, uint32_t uiLongTermPicNum, int32_t iLongTermFrameIdx, int32_t iMaxLongTermFrameIdx);
-static int32_t SlidingWindow (PWelsDecoderContext pCtx);
-
-static int32_t AddShortTermToList (PRefPic pRefPic, PPicture pPic);
-static int32_t AddLongTermToList (PRefPic pRefPic, PPicture pPic, int32_t iLongTermFrameIdx);
-static int32_t MarkAsLongTerm (PRefPic pRefPic, int32_t iFrameNum, int32_t iLongTermFrameIdx);
-#ifdef LONG_TERM_REF
-int32_t GetLTRFrameIndex (PRefPic pRefPic, int32_t iAncLTRFrameNum);
-#endif
-static int32_t RemainOneBufferInDpbForEC (PWelsDecoderContext pCtx);
-
-static void SetUnRef (PPicture pRef) {
-  if (NULL != pRef) {
-    pRef->bUsedAsRef = false;
-    pRef->bIsLongRef = false;
-    pRef->iFrameNum = -1;
-    //pRef->iFramePoc = 0;
-    pRef->iLongTermFrameIdx = -1;
-    pRef->uiQualityId = -1;
-    pRef->uiTemporalId = -1;
-    pRef->uiSpatialId = -1;
-    pRef->iSpsId = -1;
-    pRef->bIsComplete = false;
-  }
-}
-
-//reset pRefList when
-// 1.sps arrived that is new sequence starting
-// 2.IDR NAL i.e. 1st layer in IDR AU
-
-void WelsResetRefPic (PWelsDecoderContext pCtx) {
-  int32_t i = 0;
-  PRefPic pRefPic = &pCtx->sRefPic;
-  pCtx->sRefPic.uiLongRefCount[LIST_0] = pCtx->sRefPic.uiShortRefCount[LIST_0] = 0;
-
-  pRefPic->uiRefCount[LIST_0] = 0;
-
-  for (i = 0; i < MAX_DPB_COUNT; i++) {
-    if (pRefPic->pShortRefList[LIST_0][i] != NULL) {
-      SetUnRef (pRefPic->pShortRefList[LIST_0][i]);
-      pRefPic->pShortRefList[LIST_0][i] = NULL;
-    }
-  }
-  pRefPic->uiShortRefCount[LIST_0] = 0;
-
-  for (i = 0; i < MAX_DPB_COUNT; i++) {
-    if (pRefPic->pLongRefList[LIST_0][i] != NULL) {
-      SetUnRef (pRefPic->pLongRefList[LIST_0][i]);
-      pRefPic->pLongRefList[LIST_0][i] = NULL;
-    }
-  }
-  pRefPic->uiLongRefCount[LIST_0] = 0;
-}
-
-/**
- * fills the pRefPic.pRefList.
- */
-int32_t WelsInitRefList (PWelsDecoderContext pCtx, int32_t iPoc) {
-  int32_t i, iCount = 0;
-
-  if ((pCtx->sRefPic.uiShortRefCount[LIST_0] + pCtx->sRefPic.uiLongRefCount[LIST_0] <= 0) && (pCtx->eSliceType != I_SLICE
-      && pCtx->eSliceType != SI_SLICE)) {
-    if (pCtx->pParam->eEcActiveIdc !=
-        ERROR_CON_DISABLE) { //IDR lost!, recover it for future decoding with data all set to 0
-      PPicture pRef = PrefetchPic (pCtx->pPicBuff[0]);
-      if (pRef != NULL) {
-        // IDR lost, set new
-        pRef->bIsComplete = false; // Set complete flag to false for lost IDR ref picture
-        pRef->iSpsId = pCtx->pSps->iSpsId;
-        pRef->iPpsId = pCtx->pPps->iPpsId;
-        pCtx->iErrorCode |= dsDataErrorConcealed;
-        bool bCopyPrevious = ((ERROR_CON_FRAME_COPY_CROSS_IDR == pCtx->pParam->eEcActiveIdc)
-                              || (ERROR_CON_SLICE_COPY_CROSS_IDR == pCtx->pParam->eEcActiveIdc)
-                              || (ERROR_CON_SLICE_COPY_CROSS_IDR_FREEZE_RES_CHANGE == pCtx->pParam->eEcActiveIdc)
-                              || (ERROR_CON_SLICE_MV_COPY_CROSS_IDR == pCtx->pParam->eEcActiveIdc)
-                              || (ERROR_CON_SLICE_MV_COPY_CROSS_IDR_FREEZE_RES_CHANGE == pCtx->pParam->eEcActiveIdc))
-                             && (NULL != pCtx->pPreviousDecodedPictureInDpb);
-        bCopyPrevious = bCopyPrevious && (pRef->iWidthInPixel == pCtx->pPreviousDecodedPictureInDpb->iWidthInPixel)
-                        && (pRef->iHeightInPixel == pCtx->pPreviousDecodedPictureInDpb->iHeightInPixel);
-
-        if (!bCopyPrevious) {
-          memset (pRef->pData[0], 128, pRef->iLinesize[0] * pRef->iHeightInPixel);
-          memset (pRef->pData[1], 128, pRef->iLinesize[1] * pRef->iHeightInPixel / 2);
-          memset (pRef->pData[2], 128, pRef->iLinesize[2] * pRef->iHeightInPixel / 2);
-        } else if (pRef == pCtx->pPreviousDecodedPictureInDpb) {
-          WelsLog (& (pCtx->sLogCtx), WELS_LOG_WARNING, "WelsInitRefList()::EC memcpy overlap.");
-        } else {
-          memcpy (pRef->pData[0], pCtx->pPreviousDecodedPictureInDpb->pData[0], pRef->iLinesize[0] * pRef->iHeightInPixel);
-          memcpy (pRef->pData[1], pCtx->pPreviousDecodedPictureInDpb->pData[1], pRef->iLinesize[1] * pRef->iHeightInPixel / 2);
-          memcpy (pRef->pData[2], pCtx->pPreviousDecodedPictureInDpb->pData[2], pRef->iLinesize[2] * pRef->iHeightInPixel / 2);
-        }
-        pRef->iFrameNum = 0;
-        pRef->iFramePoc = 0;
-        pRef->uiTemporalId = pRef->uiQualityId = 0;
-        ExpandReferencingPicture (pRef->pData, pRef->iWidthInPixel, pRef->iHeightInPixel, pRef->iLinesize,
-                                  pCtx->sExpandPicFunc.pfExpandLumaPicture, pCtx->sExpandPicFunc.pfExpandChromaPicture);
-        AddShortTermToList (&pCtx->sRefPic, pRef);
-      } else {
-        WelsLog (& (pCtx->sLogCtx), WELS_LOG_ERROR, "WelsInitRefList()::PrefetchPic for EC errors.");
-        pCtx->iErrorCode |= dsOutOfMemory;
-        return ERR_INFO_REF_COUNT_OVERFLOW;
-      }
-    }
-  }
-
-  PPicture* ppShoreRefList = pCtx->sRefPic.pShortRefList[LIST_0];
-  PPicture* ppLongRefList  = pCtx->sRefPic.pLongRefList[LIST_0];
-  memset (pCtx->sRefPic.pRefList[LIST_0], 0, MAX_DPB_COUNT * sizeof (PPicture));
-  //short
-  for (i = 0; i < pCtx->sRefPic.uiShortRefCount[LIST_0]; ++i) {
-    pCtx->sRefPic.pRefList[LIST_0][iCount++ ] = ppShoreRefList[i];
-  }
-
-  //long
-  for (i = 0; i < pCtx->sRefPic.uiLongRefCount[LIST_0] ; ++i) {
-    pCtx->sRefPic.pRefList[LIST_0][iCount++  ] = ppLongRefList[i];
-  }
-  pCtx->sRefPic.uiRefCount[LIST_0] = iCount;
-
-  return ERR_NONE;
-}
-
-int32_t WelsReorderRefList (PWelsDecoderContext pCtx) {
-  PRefPicListReorderSyn pRefPicListReorderSyn = pCtx->pCurDqLayer->pRefPicListReordering;
-  PNalUnitHeaderExt pNalHeaderExt = &pCtx->pCurDqLayer->sLayerInfo.sNalHeaderExt;
-  PSliceHeader pSliceHeader = &pCtx->pCurDqLayer->sLayerInfo.sSliceInLayer.sSliceHeaderExt.sSliceHeader;
-  PPicture pPic = NULL;
-  PPicture* ppRefList = pCtx->sRefPic.pRefList[LIST_0];
-  int32_t iMaxRefIdx = pCtx->pSps->iNumRefFrames;
-  int32_t iRefCount = pCtx->sRefPic.uiRefCount[LIST_0];
-  int32_t iPredFrameNum = pSliceHeader->iFrameNum;
-  int32_t iMaxPicNum = 1 << pSliceHeader->pSps->uiLog2MaxFrameNum;
-  int32_t iAbsDiffPicNum = -1;
-  int32_t iReorderingIndex = 0;
-  int32_t i = 0;
-
-  if (pCtx->eSliceType == I_SLICE || pCtx->eSliceType == SI_SLICE) {
-    return ERR_NONE;
-  }
-
-  if (iRefCount <= 0) {
-    pCtx->iErrorCode = dsNoParamSets; //No any reference for decoding, SHOULD request IDR
-    return ERR_INFO_REFERENCE_PIC_LOST;
-  }
-
-  if (pRefPicListReorderSyn->bRefPicListReorderingFlag[LIST_0]) {
-    while ((iReorderingIndex < iMaxRefIdx)
-           && (pRefPicListReorderSyn->sReorderingSyn[LIST_0][iReorderingIndex].uiReorderingOfPicNumsIdc != 3)) {
-      uint16_t uiReorderingOfPicNumsIdc =
-        pRefPicListReorderSyn->sReorderingSyn[LIST_0][iReorderingIndex].uiReorderingOfPicNumsIdc;
-      if (uiReorderingOfPicNumsIdc < 2) {
-        iAbsDiffPicNum = pRefPicListReorderSyn->sReorderingSyn[LIST_0][iReorderingIndex].uiAbsDiffPicNumMinus1 + 1;
-
-        if (uiReorderingOfPicNumsIdc == 0) {
-          iPredFrameNum -= iAbsDiffPicNum;
-        } else {
-          iPredFrameNum += iAbsDiffPicNum;
-        }
-        iPredFrameNum &= iMaxPicNum - 1;
-
-        for (i = iMaxRefIdx - 1; i >= 0; i--) {
-          if (ppRefList[i] != NULL && ppRefList[i]->iFrameNum == iPredFrameNum && !ppRefList[i]->bIsLongRef) {
-            if ((pNalHeaderExt->uiQualityId == ppRefList[i]->uiQualityId)
-                && (pSliceHeader->iSpsId != ppRefList[i]->iSpsId)) {   //check;
-              WelsLog (& (pCtx->sLogCtx), WELS_LOG_WARNING, "WelsReorderRefList()::::BASE LAYER::::iSpsId:%d, ref_sps_id:%d",
-                       pSliceHeader->iSpsId, ppRefList[i]->iSpsId);
-              pCtx->iErrorCode = dsNoParamSets; //cross-IDR reference frame selection, SHOULD request IDR.--
-              return ERR_INFO_REFERENCE_PIC_LOST;
-            } else {
-              break;
-            }
-          }
-        }
-
-      } else if (uiReorderingOfPicNumsIdc == 2) {
-        for (i = iMaxRefIdx - 1; i >= 0; i--) {
-          if (ppRefList[i] != NULL && ppRefList[i]->bIsLongRef
-              && ppRefList[i]->iLongTermFrameIdx ==
-              pRefPicListReorderSyn->sReorderingSyn[LIST_0][iReorderingIndex].uiLongTermPicNum) {
-            if ((pNalHeaderExt->uiQualityId == ppRefList[i]->uiQualityId)
-                && (pSliceHeader->iSpsId != ppRefList[i]->iSpsId)) {    //check;
-              WelsLog (& (pCtx->sLogCtx), WELS_LOG_WARNING, "WelsReorderRefList()::::BASE LAYER::::iSpsId:%d, ref_sps_id:%d",
-                       pSliceHeader->iSpsId, ppRefList[i]->iSpsId);
-              pCtx->iErrorCode = dsNoParamSets; //cross-IDR reference frame selection, SHOULD request IDR.--
-              return ERR_INFO_REFERENCE_PIC_LOST;
-            } else {
-              break;
-            }
-          }
-        }
-      }
-      if (i < 0) {
-        return ERR_INFO_REFERENCE_PIC_LOST;
-      }
-      pPic = ppRefList[i];
-      if (i > iReorderingIndex) {
-        memmove (&ppRefList[1 + iReorderingIndex], &ppRefList[iReorderingIndex],
-                 (i - iReorderingIndex)*sizeof (PPicture)); //confirmed_safe_unsafe_usage
-      } else if (i < iReorderingIndex) {
-        memmove (&ppRefList[1 + iReorderingIndex], &ppRefList[iReorderingIndex],
-                 (iMaxRefIdx - iReorderingIndex)*sizeof (PPicture));
-      }
-      ppRefList[iReorderingIndex] = pPic;
-      iReorderingIndex++;
-    }
-  }
-  return ERR_NONE;
-}
-
-int32_t WelsMarkAsRef (PWelsDecoderContext pCtx) {
-  PRefPic pRefPic = &pCtx->sRefPic;
-  PRefPicMarking pRefPicMarking = pCtx->pCurDqLayer->pRefPicMarking;
-  PAccessUnit pCurAU = pCtx->pAccessUnitList;
-  bool bIsIDRAU = false;
-  uint32_t j;
-
-  int32_t iRet = ERR_NONE;
-
-  pCtx->pDec->uiQualityId = pCtx->pCurDqLayer->sLayerInfo.sNalHeaderExt.uiQualityId;
-  pCtx->pDec->uiTemporalId = pCtx->pCurDqLayer->sLayerInfo.sNalHeaderExt.uiTemporalId;
-  pCtx->pDec->iSpsId = pCtx->pSps->iSpsId;
-  pCtx->pDec->iPpsId = pCtx->pPps->iPpsId;
-
-  for (j = pCurAU->uiStartPos; j <= pCurAU->uiEndPos; j++) {
-    if (pCurAU->pNalUnitsList[j]->sNalHeaderExt.sNalUnitHeader.eNalUnitType == NAL_UNIT_CODED_SLICE_IDR
-        || pCurAU->pNalUnitsList[j]->sNalHeaderExt.bIdrFlag) {
-      bIsIDRAU = true;
-      break;
-    }
-  }
-  if (bIsIDRAU) {
-    if (pRefPicMarking->bLongTermRefFlag) {
-      pCtx->sRefPic.iMaxLongTermFrameIdx = 0;
-      AddLongTermToList (pRefPic, pCtx->pDec, 0);
-    } else {
-      pCtx->sRefPic.iMaxLongTermFrameIdx = -1;
-    }
-  } else {
-    if (pRefPicMarking->bAdaptiveRefPicMarkingModeFlag) {
-      iRet = MMCO (pCtx, pRefPicMarking);
-      if (iRet != ERR_NONE) {
-        if (pCtx->pParam->eEcActiveIdc != ERROR_CON_DISABLE) {
-          iRet = RemainOneBufferInDpbForEC (pCtx);
-          WELS_VERIFY_RETURN_IF (iRet, iRet);
-        } else {
-          return iRet;
-        }
-      }
-
-      if (pCtx->bLastHasMmco5) {
-        pCtx->pDec->iFrameNum = 0;
-        pCtx->pDec->iFramePoc = 0;
-      }
-
-    } else {
-      iRet = SlidingWindow (pCtx);
-      if (iRet != ERR_NONE) {
-        if (pCtx->pParam->eEcActiveIdc != ERROR_CON_DISABLE) {
-          iRet = RemainOneBufferInDpbForEC (pCtx);
-          WELS_VERIFY_RETURN_IF (iRet, iRet);
-        } else {
-          return iRet;
-        }
-      }
-    }
-  }
-
-  if (!pCtx->pDec->bIsLongRef) {
-    if (pRefPic->uiLongRefCount[LIST_0] + pRefPic->uiShortRefCount[LIST_0] >= WELS_MAX (1, pCtx->pSps->iNumRefFrames)) {
-      if (pCtx->pParam->eEcActiveIdc != ERROR_CON_DISABLE) {
-        iRet = RemainOneBufferInDpbForEC (pCtx);
-        WELS_VERIFY_RETURN_IF (iRet, iRet);
-      } else {
-        return ERR_INFO_INVALID_MMCO_REF_NUM_OVERFLOW;
-      }
-    }
-    iRet = AddShortTermToList (pRefPic, pCtx->pDec);
-  }
-
-  return iRet;
-}
-
-static int32_t MMCO (PWelsDecoderContext pCtx, PRefPicMarking pRefPicMarking) {
-  PSps pSps = pCtx->pCurDqLayer->sLayerInfo.pSps;
-  int32_t i = 0;
-  int32_t iRet = ERR_NONE;
-  for (i = 0; i < MAX_MMCO_COUNT && pRefPicMarking->sMmcoRef[i].uiMmcoType != MMCO_END; i++) {
-    uint32_t uiMmcoType = pRefPicMarking->sMmcoRef[i].uiMmcoType;
-    int32_t iShortFrameNum = (pCtx->iFrameNum - pRefPicMarking->sMmcoRef[i].iDiffOfPicNum) & ((
-                               1 << pSps->uiLog2MaxFrameNum) - 1);
-    uint32_t uiLongTermPicNum = pRefPicMarking->sMmcoRef[i].uiLongTermPicNum;
-    int32_t iLongTermFrameIdx = pRefPicMarking->sMmcoRef[i].iLongTermFrameIdx;
-    int32_t iMaxLongTermFrameIdx = pRefPicMarking->sMmcoRef[i].iMaxLongTermFrameIdx;
-    if (uiMmcoType > MMCO_LONG) {
-      return ERR_INFO_INVALID_MMCO_OPCODE_BASE;
-    }
-    iRet = MMCOProcess (pCtx, uiMmcoType, iShortFrameNum, uiLongTermPicNum, iLongTermFrameIdx, iMaxLongTermFrameIdx);
-    if (iRet != ERR_NONE) {
-      return iRet;
-    }
-  }
-  if (i == MAX_MMCO_COUNT) { //although Rec does not handle this condition, we here prohibit too many MMCO op
-    return ERR_INFO_INVALID_MMCO_NUM;
-  }
-
-  return ERR_NONE;
-}
-static int32_t MMCOProcess (PWelsDecoderContext pCtx, uint32_t uiMmcoType,
-                            int32_t iShortFrameNum, uint32_t uiLongTermPicNum , int32_t iLongTermFrameIdx, int32_t iMaxLongTermFrameIdx) {
-  PRefPic pRefPic = &pCtx->sRefPic;
-  PPicture pPic = NULL;
-  int32_t i = 0;
-  int32_t iRet = ERR_NONE;
-
-  switch (uiMmcoType) {
-  case MMCO_SHORT2UNUSED:
-    pPic = WelsDelShortFromListSetUnref (pRefPic, iShortFrameNum);
-    if (pPic == NULL) {
-      WelsLog (& (pCtx->sLogCtx), WELS_LOG_WARNING, "MMCO_SHORT2UNUSED: delete an empty entry from short term list");
-    }
-    break;
-  case MMCO_LONG2UNUSED:
-    pPic = WelsDelLongFromListSetUnref (pRefPic, uiLongTermPicNum);
-    if (pPic == NULL) {
-      WelsLog (& (pCtx->sLogCtx), WELS_LOG_WARNING, "MMCO_LONG2UNUSED: delete an empty entry from long term list");
-    }
-    break;
-  case MMCO_SHORT2LONG:
-    if (iLongTermFrameIdx > pRefPic->iMaxLongTermFrameIdx) {
-      return ERR_INFO_INVALID_MMCO_LONG_TERM_IDX_EXCEED_MAX;
-    }
-    pPic = WelsDelShortFromList (pRefPic, iShortFrameNum);
-    if (pPic == NULL) {
-      WelsLog (& (pCtx->sLogCtx), WELS_LOG_WARNING, "MMCO_LONG2LONG: delete an empty entry from short term list");
-      break;
-    }
-    WelsDelLongFromListSetUnref (pRefPic, iLongTermFrameIdx);
-#ifdef LONG_TERM_REF
-    pCtx->bCurAuContainLtrMarkSeFlag = true;
-    pCtx->iFrameNumOfAuMarkedLtr      = iShortFrameNum;
-    WelsLog (& (pCtx->sLogCtx), WELS_LOG_INFO, "ex_mark_avc():::MMCO_SHORT2LONG:::LTR marking....iFrameNum: %d",
-             pCtx->iFrameNumOfAuMarkedLtr);
-#endif
-
-    MarkAsLongTerm (pRefPic, iShortFrameNum, iLongTermFrameIdx);
-    break;
-  case MMCO_SET_MAX_LONG:
-    pRefPic->iMaxLongTermFrameIdx = iMaxLongTermFrameIdx;
-    for (i = 0 ; i < pRefPic->uiLongRefCount[LIST_0]; i++) {
-      if (pRefPic->pLongRefList[LIST_0][i]->iLongTermFrameIdx > pRefPic->iMaxLongTermFrameIdx) {
-        WelsDelLongFromListSetUnref (pRefPic, pRefPic->pLongRefList[LIST_0][i]->iLongTermFrameIdx);
-      }
-    }
-    break;
-  case MMCO_RESET:
-    WelsResetRefPic (pCtx);
-    pCtx->bLastHasMmco5 = true;
-    break;
-  case MMCO_LONG:
-    if (iLongTermFrameIdx > pRefPic->iMaxLongTermFrameIdx) {
-      return ERR_INFO_INVALID_MMCO_LONG_TERM_IDX_EXCEED_MAX;
-    }
-    WelsDelLongFromListSetUnref (pRefPic, iLongTermFrameIdx);
-    if (pRefPic->uiLongRefCount[LIST_0] + pRefPic->uiShortRefCount[LIST_0] >= WELS_MAX (1, pCtx->pSps->iNumRefFrames)) {
-      return ERR_INFO_INVALID_MMCO_REF_NUM_OVERFLOW;
-    }
-#ifdef LONG_TERM_REF
-    pCtx->bCurAuContainLtrMarkSeFlag = true;
-    pCtx->iFrameNumOfAuMarkedLtr      = pCtx->iFrameNum;
-    WelsLog (& (pCtx->sLogCtx), WELS_LOG_INFO, "ex_mark_avc():::MMCO_LONG:::LTR marking....iFrameNum: %d",
-             pCtx->iFrameNum);
-#endif
-    iRet = AddLongTermToList (pRefPic, pCtx->pDec, iLongTermFrameIdx);
-    break;
-  default :
-    break;
-  }
-
-  return iRet;
-}
-
-static int32_t SlidingWindow (PWelsDecoderContext pCtx) {
-  PRefPic pRefPic = &pCtx->sRefPic;
-  PPicture pPic = NULL;
-  int32_t i = 0;
-
-  if (pCtx->sRefPic.uiShortRefCount[LIST_0] + pCtx->sRefPic.uiLongRefCount[LIST_0] >= pCtx->pSps->iNumRefFrames) {
-    if (pCtx->sRefPic.uiShortRefCount[LIST_0] == 0) {
-      WelsLog (& (pCtx->sLogCtx), WELS_LOG_ERROR, "No reference picture in short term list when sliding window");
-      return ERR_INFO_INVALID_MMCO_REF_NUM_NOT_ENOUGH;
-    }
-    for (i = pRefPic->uiShortRefCount[LIST_0] - 1; i >= 0; i--) {
-      pPic = WelsDelShortFromList (pRefPic, pRefPic->pShortRefList[LIST_0][i]->iFrameNum);
-      if (pPic) {
-        SetUnRef (pPic);
-        break;
-      } else {
-        return ERR_INFO_INVALID_MMCO_REF_NUM_OVERFLOW;
-      }
-    }
-  }
-  return ERR_NONE;
-}
-
-static PPicture WelsDelShortFromList (PRefPic pRefPic, int32_t iFrameNum) {
-  int32_t i = 0;
-  int32_t iMoveSize = 0;
-  PPicture pPic = NULL;
-
-  for (i = 0; i < pRefPic->uiShortRefCount[LIST_0]; i++) {
-    if (pRefPic->pShortRefList[LIST_0][i]->iFrameNum == iFrameNum) {
-      iMoveSize = pRefPic->uiShortRefCount[LIST_0] - i - 1;
-      pRefPic->pShortRefList[LIST_0][i]->bUsedAsRef = false;
-      pPic = pRefPic->pShortRefList[LIST_0][i];
-      pRefPic->pShortRefList[LIST_0][i] = NULL;
-      if (iMoveSize > 0) {
-        memmove (&pRefPic->pShortRefList[LIST_0][i], &pRefPic->pShortRefList[LIST_0][i + 1],
-                 iMoveSize * sizeof (PPicture)); //confirmed_safe_unsafe_usage
-      }
-      pRefPic->uiShortRefCount[LIST_0]--;
-      pRefPic->pShortRefList[LIST_0][pRefPic->uiShortRefCount[LIST_0]] = NULL;
-      break;
-    }
-  }
-
-  return pPic;
-}
-
-static PPicture WelsDelShortFromListSetUnref (PRefPic pRefPic, int32_t iFrameNum) {
-  PPicture pPic = WelsDelShortFromList (pRefPic, iFrameNum);
-  if (pPic) {
-    SetUnRef (pPic);
-  }
-  return pPic;
-}
-
-static PPicture WelsDelLongFromList (PRefPic pRefPic, uint32_t uiLongTermFrameIdx) {
-  PPicture pPic = NULL;
-  int32_t i = 0;
-  for (i = 0; i < pRefPic->uiLongRefCount[LIST_0]; i++) {
-    pPic = pRefPic->pLongRefList[LIST_0][i];
-    if (pPic->iLongTermFrameIdx == (int32_t)uiLongTermFrameIdx) {
-      int32_t iMoveSize = pRefPic->uiLongRefCount[LIST_0] - i - 1;
-      pPic->bUsedAsRef = false;
-      pPic->bIsLongRef = false;
-      if (iMoveSize > 0) {
-        memmove (&pRefPic->pLongRefList[LIST_0][i], &pRefPic->pLongRefList[LIST_0][i + 1],
-                 iMoveSize * sizeof (PPicture)); //confirmed_safe_unsafe_usage
-      }
-      pRefPic->uiLongRefCount[LIST_0]--;
-      pRefPic->pLongRefList[LIST_0][pRefPic->uiLongRefCount[LIST_0]] = NULL;
-      return pPic;
-    }
-  }
-  return NULL;
-}
-
-static PPicture WelsDelLongFromListSetUnref (PRefPic pRefPic, uint32_t uiLongTermFrameIdx) {
-  PPicture pPic = WelsDelLongFromList (pRefPic, uiLongTermFrameIdx);
-  if (pPic) {
-    SetUnRef (pPic);
-  }
-  return pPic;
-}
-
-static int32_t AddShortTermToList (PRefPic pRefPic, PPicture pPic) {
-  pPic->bUsedAsRef = true;
-  pPic->bIsLongRef = false;
-  pPic->iLongTermFrameIdx = -1;
-  if (pRefPic->uiShortRefCount[LIST_0] > 0) {
-    // Check the duplicate frame_num in short ref list
-    for (int32_t iPos = 0; iPos < pRefPic->uiShortRefCount[LIST_0]; iPos++) {
-      if (pPic->iFrameNum == pRefPic->pShortRefList[LIST_0][iPos]->iFrameNum) {
-        // Replace the previous ref pic with the new one with the same frame_num
-        pRefPic->pShortRefList[LIST_0][iPos] = pPic;
-        return ERR_INFO_DUPLICATE_FRAME_NUM;
-      }
-    }
-
-    memmove (&pRefPic->pShortRefList[LIST_0][1], &pRefPic->pShortRefList[LIST_0][0],
-             pRefPic->uiShortRefCount[LIST_0]*sizeof (PPicture));//confirmed_safe_unsafe_usage
-  }
-  pRefPic->pShortRefList[LIST_0][0] = pPic;
-  pRefPic->uiShortRefCount[LIST_0]++;
-  return ERR_NONE;
-}
-
-static int32_t AddLongTermToList (PRefPic pRefPic, PPicture pPic, int32_t iLongTermFrameIdx) {
-  int32_t i = 0;
-
-  pPic->bUsedAsRef = true;
-  pPic->bIsLongRef = true;
-  pPic->iLongTermFrameIdx = iLongTermFrameIdx;
-  if (pRefPic->uiLongRefCount[LIST_0] == 0) {
-    pRefPic->pLongRefList[LIST_0][pRefPic->uiLongRefCount[LIST_0]] = pPic;
-  } else {
-    for (i = 0; i < pRefPic->uiLongRefCount[LIST_0]; i++) {
-      if (pRefPic->pLongRefList[LIST_0][i]->iLongTermFrameIdx > pPic->iLongTermFrameIdx) {
-        break;
-      }
-    }
-    memmove (&pRefPic->pLongRefList[LIST_0][i + 1], &pRefPic->pLongRefList[LIST_0][i],
-             (pRefPic->uiLongRefCount[LIST_0] - i)*sizeof (PPicture)); //confirmed_safe_unsafe_usage
-    pRefPic->pLongRefList[LIST_0][i] = pPic;
-  }
-
-  pRefPic->uiLongRefCount[LIST_0]++;
-  return ERR_NONE;
-}
-
-static int32_t MarkAsLongTerm (PRefPic pRefPic, int32_t iFrameNum, int32_t iLongTermFrameIdx) {
-  PPicture pPic = NULL;
-  int32_t i = 0;
-  int32_t iRet = ERR_NONE;
-  WelsDelLongFromListSetUnref (pRefPic, iLongTermFrameIdx);
-
-  for (i = 0; i < pRefPic->uiRefCount[LIST_0]; i++) {
-    pPic = pRefPic->pRefList[LIST_0][i];
-    if (pPic->iFrameNum == iFrameNum && !pPic->bIsLongRef) {
-      iRet = AddLongTermToList (pRefPic, pPic, iLongTermFrameIdx);
-      break;
-    }
-  }
-
-  return iRet;
-}
-
-#ifdef LONG_TERM_REF
-int32_t GetLTRFrameIndex (PRefPic pRefPic, int32_t iAncLTRFrameNum) {
-  int32_t iLTRFrameIndex = -1;
-  PPicture pPic;
-  for (int i = 0; i < pRefPic->uiLongRefCount[0]; ++i) {
-    pPic = pRefPic->pLongRefList[LIST_0][i];
-    if (pPic->iFrameNum == iAncLTRFrameNum) {
-      return (pPic->iLongTermFrameIdx);
-    }
-  }
-  return iLTRFrameIndex;
-}
-#endif
-
-static int32_t RemainOneBufferInDpbForEC (PWelsDecoderContext pCtx) {
-  int32_t iRet = ERR_NONE;
-  PRefPic pRefPic = &pCtx->sRefPic;
-  if (pRefPic->uiShortRefCount[0] + pRefPic->uiLongRefCount[0] < pCtx->pSps->iNumRefFrames)
-    return iRet;
-
-  if (pRefPic->uiShortRefCount[0] > 0) {
-    iRet = SlidingWindow (pCtx);
-  } else { //all LTR, remove the smallest long_term_frame_idx
-    int32_t iLongTermFrameIdx = 0;
-    int32_t iMaxLongTermFrameIdx = pRefPic->iMaxLongTermFrameIdx;
-#ifdef LONG_TERM_REF
-    int32_t iCurrLTRFrameIdx = GetLTRFrameIndex (pRefPic, pCtx->iFrameNumOfAuMarkedLtr);
-#endif
-    while ((pRefPic->uiLongRefCount[0] >= pCtx->pSps->iNumRefFrames) && (iLongTermFrameIdx <= iMaxLongTermFrameIdx)) {
-#ifdef LONG_TERM_REF
-      if (iLongTermFrameIdx == iCurrLTRFrameIdx) {
-        iLongTermFrameIdx++;
-        continue;
-      }
-#endif
-      WelsDelLongFromListSetUnref (pRefPic, iLongTermFrameIdx);
-      iLongTermFrameIdx++;
-    }
-  }
-  if (pRefPic->uiShortRefCount[0] + pRefPic->uiLongRefCount[0] >=
-      pCtx->pSps->iNumRefFrames) { //fail to remain one empty buffer in DPB
-    WelsLog (& (pCtx->sLogCtx), WELS_LOG_WARNING, "RemainOneBufferInDpbForEC(): empty one DPB failed for EC!");
-    iRet = ERR_INFO_REF_COUNT_OVERFLOW;
-  }
-
-  return iRet;
-}
-
-} // namespace WelsDec
+/*!
+ * \copy
+ *     Copyright (c)  2008-2013, Cisco Systems
+ *     All rights reserved.
+ *
+ *     Redistribution and use in source and binary forms, with or without
+ *     modification, are permitted provided that the following conditions
+ *     are met:
+ *
+ *        * Redistributions of source code must retain the above copyright
+ *          notice, this list of conditions and the following disclaimer.
+ *
+ *        * Redistributions in binary form must reproduce the above copyright
+ *          notice, this list of conditions and the following disclaimer in
+ *          the documentation and/or other materials provided with the
+ *          distribution.
+ *
+ *     THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *     "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *     LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+ *     FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+ *     COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ *     INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ *     BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ *     LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+ *     CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ *     LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
+ *     ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ *     POSSIBILITY OF SUCH DAMAGE.
+ *
+ *
+ *  manage_dec_ref.cpp
+ *
+ *  Abstract
+ *      Implementation for managing reference picture
+ *
+ *  History
+ *      07/21/2008 Created
+ *
+ *****************************************************************************/
+
+#include "manage_dec_ref.h"
+#include "error_concealment.h"
+#include "error_code.h"
+
+namespace WelsDec {
+
+static PPicture WelsDelShortFromList (PRefPic pRefPic, int32_t iFrameNum);
+static PPicture WelsDelLongFromList (PRefPic pRefPic, uint32_t uiLongTermFrameIdx);
+static PPicture WelsDelShortFromListSetUnref (PRefPic pRefPic, int32_t iFrameNum);
+static PPicture WelsDelLongFromListSetUnref (PRefPic pRefPic, uint32_t uiLongTermFrameIdx);
+
+static int32_t MMCO (PWelsDecoderContext pCtx, PRefPicMarking pRefPicMarking);
+static int32_t MMCOProcess (PWelsDecoderContext pCtx, uint32_t uiMmcoType,
+                            int32_t iShortFrameNum, uint32_t uiLongTermPicNum, int32_t iLongTermFrameIdx, int32_t iMaxLongTermFrameIdx);
+static int32_t SlidingWindow (PWelsDecoderContext pCtx);
+
+static int32_t AddShortTermToList (PRefPic pRefPic, PPicture pPic);
+static int32_t AddLongTermToList (PRefPic pRefPic, PPicture pPic, int32_t iLongTermFrameIdx, uint32_t uiLongTermPicNum);
+static int32_t MarkAsLongTerm (PRefPic pRefPic, int32_t iFrameNum, int32_t iLongTermFrameIdx,
+                               uint32_t uiLongTermPicNum);
+static int32_t WelsCheckAndRecoverForFutureDecoding (PWelsDecoderContext pCtx);
+#ifdef LONG_TERM_REF
+int32_t GetLTRFrameIndex (PRefPic pRefPic, int32_t iAncLTRFrameNum);
+#endif
+static int32_t RemainOneBufferInDpbForEC (PWelsDecoderContext pCtx);
+
+static void SetUnRef (PPicture pRef) {
+  if (NULL != pRef) {
+    pRef->bUsedAsRef = false;
+    pRef->bIsLongRef = false;
+    pRef->iFrameNum = -1;
+    pRef->iFrameWrapNum = -1;
+    //pRef->iFramePoc = 0;
+    pRef->iLongTermFrameIdx = -1;
+    pRef->uiLongTermPicNum = 0;
+    pRef->uiQualityId = -1;
+    pRef->uiTemporalId = -1;
+    pRef->uiSpatialId = -1;
+    pRef->iSpsId = -1;
+    pRef->bIsComplete = false;
+  }
+}
+
+//reset pRefList when
+// 1.sps arrived that is new sequence starting
+// 2.IDR NAL i.e. 1st layer in IDR AU
+
+void WelsResetRefPic (PWelsDecoderContext pCtx) {
+  int32_t i = 0;
+  PRefPic pRefPic = &pCtx->sRefPic;
+  pCtx->sRefPic.uiLongRefCount[LIST_0] = pCtx->sRefPic.uiShortRefCount[LIST_0] = 0;
+
+  pRefPic->uiRefCount[LIST_0] = 0;
+  pRefPic->uiRefCount[LIST_1] = 0;
+
+  for (i = 0; i < MAX_DPB_COUNT; i++) {
+    if (pRefPic->pShortRefList[LIST_0][i] != NULL) {
+      SetUnRef (pRefPic->pShortRefList[LIST_0][i]);
+      pRefPic->pShortRefList[LIST_0][i] = NULL;
+    }
+  }
+  pRefPic->uiShortRefCount[LIST_0] = 0;
+
+  for (i = 0; i < MAX_DPB_COUNT; i++) {
+    if (pRefPic->pLongRefList[LIST_0][i] != NULL) {
+      SetUnRef (pRefPic->pLongRefList[LIST_0][i]);
+      pRefPic->pLongRefList[LIST_0][i] = NULL;
+    }
+  }
+  pRefPic->uiLongRefCount[LIST_0] = 0;
+}
+
+static int32_t WelsCheckAndRecoverForFutureDecoding (PWelsDecoderContext pCtx) {
+  if ((pCtx->sRefPic.uiShortRefCount[LIST_0] + pCtx->sRefPic.uiLongRefCount[LIST_0] <= 0) && (pCtx->eSliceType != I_SLICE
+      && pCtx->eSliceType != SI_SLICE)) {
+    if (pCtx->pParam->eEcActiveIdc !=
+        ERROR_CON_DISABLE) { //IDR lost!, recover it for future decoding with data all set to 0
+      PPicture pRef = PrefetchPic (pCtx->pPicBuff);
+      if (pRef != NULL) {
+        // IDR lost, set new
+        pRef->bIsComplete = false; // Set complete flag to false for lost IDR ref picture
+        pRef->iSpsId = pCtx->pSps->iSpsId;
+        pRef->iPpsId = pCtx->pPps->iPpsId;
+        pCtx->iErrorCode |= dsDataErrorConcealed;
+        bool bCopyPrevious = ((ERROR_CON_FRAME_COPY_CROSS_IDR == pCtx->pParam->eEcActiveIdc)
+                              || (ERROR_CON_SLICE_COPY_CROSS_IDR == pCtx->pParam->eEcActiveIdc)
+                              || (ERROR_CON_SLICE_COPY_CROSS_IDR_FREEZE_RES_CHANGE == pCtx->pParam->eEcActiveIdc)
+                              || (ERROR_CON_SLICE_MV_COPY_CROSS_IDR == pCtx->pParam->eEcActiveIdc)
+                              || (ERROR_CON_SLICE_MV_COPY_CROSS_IDR_FREEZE_RES_CHANGE == pCtx->pParam->eEcActiveIdc))
+                             && (NULL != pCtx->pPreviousDecodedPictureInDpb);
+        bCopyPrevious = bCopyPrevious && (pRef->iWidthInPixel == pCtx->pPreviousDecodedPictureInDpb->iWidthInPixel)
+                        && (pRef->iHeightInPixel == pCtx->pPreviousDecodedPictureInDpb->iHeightInPixel);
+
+        if (!bCopyPrevious) {
+          memset (pRef->pData[0], 128, pRef->iLinesize[0] * pRef->iHeightInPixel);
+          memset (pRef->pData[1], 128, pRef->iLinesize[1] * pRef->iHeightInPixel / 2);
+          memset (pRef->pData[2], 128, pRef->iLinesize[2] * pRef->iHeightInPixel / 2);
+        } else if (pRef == pCtx->pPreviousDecodedPictureInDpb) {
+          WelsLog (& (pCtx->sLogCtx), WELS_LOG_WARNING, "WelsInitRefList()::EC memcpy overlap.");
+        } else {
+          memcpy (pRef->pData[0], pCtx->pPreviousDecodedPictureInDpb->pData[0], pRef->iLinesize[0] * pRef->iHeightInPixel);
+          memcpy (pRef->pData[1], pCtx->pPreviousDecodedPictureInDpb->pData[1], pRef->iLinesize[1] * pRef->iHeightInPixel / 2);
+          memcpy (pRef->pData[2], pCtx->pPreviousDecodedPictureInDpb->pData[2], pRef->iLinesize[2] * pRef->iHeightInPixel / 2);
+        }
+        pRef->iFrameNum = 0;
+        pRef->iFramePoc = 0;
+        pRef->uiTemporalId = pRef->uiQualityId = 0;
+        ExpandReferencingPicture (pRef->pData, pRef->iWidthInPixel, pRef->iHeightInPixel, pRef->iLinesize,
+                                  pCtx->sExpandPicFunc.pfExpandLumaPicture, pCtx->sExpandPicFunc.pfExpandChromaPicture);
+        AddShortTermToList (&pCtx->sRefPic, pRef);
+      } else {
+        WelsLog (& (pCtx->sLogCtx), WELS_LOG_ERROR, "WelsInitRefList()::PrefetchPic for EC errors.");
+        pCtx->iErrorCode |= dsOutOfMemory;
+        return ERR_INFO_REF_COUNT_OVERFLOW;
+      }
+    }
+  }
+  return ERR_NONE;
+}
+
+static void WrapShortRefPicNum (PWelsDecoderContext pCtx) {
+  int32_t i;
+  PSliceHeader pSliceHeader = &pCtx->pCurDqLayer->sLayerInfo.sSliceInLayer.sSliceHeaderExt.sSliceHeader;
+  int32_t iMaxPicNum = 1 << pSliceHeader->pSps->uiLog2MaxFrameNum;
+  PPicture* ppShoreRefList = pCtx->sRefPic.pShortRefList[LIST_0];
+  int32_t iShortRefCount = pCtx->sRefPic.uiShortRefCount[LIST_0];
+  //wrap pic num
+  for (i = 0; i < iShortRefCount; i++) {
+    if (ppShoreRefList[i]) {
+      if (ppShoreRefList[i]->iFrameNum > pSliceHeader->iFrameNum)
+        ppShoreRefList[i]->iFrameWrapNum = ppShoreRefList[i]->iFrameNum - iMaxPicNum;
+      else
+        ppShoreRefList[i]->iFrameWrapNum = ppShoreRefList[i]->iFrameNum;
+    }
+  }
+}
+
+/**
+* fills the pRefPic.pRefList LIST_0 and LIST_0 for B-Slice.
+*/
+int32_t WelsInitBSliceRefList (PWelsDecoderContext pCtx, int32_t iPoc) {
+
+  int32_t err = WelsCheckAndRecoverForFutureDecoding (pCtx);
+  if (err != ERR_NONE) return err;
+
+  WrapShortRefPicNum (pCtx);
+
+  PPicture* ppShoreRefList = pCtx->sRefPic.pShortRefList[LIST_0];
+  PPicture* ppLongRefList = pCtx->sRefPic.pLongRefList[LIST_0];
+  memset (pCtx->sRefPic.pRefList[LIST_0], 0, MAX_DPB_COUNT * sizeof (PPicture));
+  memset (pCtx->sRefPic.pRefList[LIST_1], 0, MAX_DPB_COUNT * sizeof (PPicture));
+  int32_t iLSCurrPocCount = 0;
+  int32_t iLTCurrPocCount = 0;
+  PPicture pLSCurrPocList0[MAX_DPB_COUNT];
+  PPicture pLTCurrPocList0[MAX_DPB_COUNT];
+  for (int32_t i = 0; i < pCtx->sRefPic.uiShortRefCount[LIST_0]; ++i) {
+    if (ppShoreRefList[i]->iFramePoc < iPoc) {
+      pLSCurrPocList0[iLSCurrPocCount++] = ppShoreRefList[i];
+    }
+  }
+  for (int32_t i = pCtx->sRefPic.uiShortRefCount[LIST_0] - 1; i >= 0; --i) {
+    if (ppShoreRefList[i]->iFramePoc > iPoc) {
+      pLTCurrPocList0[iLTCurrPocCount++] = ppShoreRefList[i];
+    }
+  }
+  if (pCtx->sRefPic.uiLongRefCount[LIST_0] > 1) {
+    //long sorts in increasing order
+    PPicture pTemp;
+    for (int32_t i = 0; i < pCtx->sRefPic.uiLongRefCount[LIST_0]; ++i) {
+      for (int32_t j = i + 1; j < pCtx->sRefPic.uiLongRefCount[LIST_0]; ++j) {
+        if (ppLongRefList[j]->iFramePoc < ppLongRefList[i]->iFramePoc) {
+          pTemp = ppLongRefList[i];
+          ppLongRefList[i] = ppLongRefList[j];
+          ppLongRefList[j] = pTemp;
+        }
+      }
+    }
+  }
+  int32_t iCurrPocCount = iLSCurrPocCount + iLTCurrPocCount;
+  int32_t iCount = 0;
+  //LIST_0
+  //short
+  //It may need to sort LIST_0 and LIST_1 so that they will have the right default orders.
+  for (int32_t i = 0; i < iLSCurrPocCount; ++i) {
+    pCtx->sRefPic.pRefList[LIST_0][iCount++] = pLSCurrPocList0[i];
+  }
+  if (iLSCurrPocCount > 1) {
+    //LIST_0 short sorts in decreasing order
+    PPicture pTemp;
+    for (int32_t i = 0; i < iLSCurrPocCount; ++i) {
+      for (int32_t j = i + 1; j < iLSCurrPocCount; ++j) {
+        if (pCtx->sRefPic.pRefList[LIST_0][j]->iFramePoc > pCtx->sRefPic.pRefList[LIST_0][i]->iFramePoc) {
+          pTemp = pCtx->sRefPic.pRefList[LIST_0][i];
+          pCtx->sRefPic.pRefList[LIST_0][i] = pCtx->sRefPic.pRefList[LIST_0][j];
+          pCtx->sRefPic.pRefList[LIST_0][j] = pTemp;
+        }
+      }
+    }
+  }
+  for (int32_t i = 0; i < iLTCurrPocCount; ++i) {
+    pCtx->sRefPic.pRefList[LIST_0][iCount++] = pLTCurrPocList0[i];
+  }
+  if (iLTCurrPocCount > 1) {
+    //LIST_0 short sorts in increasing order
+    PPicture pTemp;
+    for (int32_t i = iLSCurrPocCount; i < iCurrPocCount; ++i) {
+      for (int32_t j = i + 1; j < iCurrPocCount; ++j) {
+        if (pCtx->sRefPic.pRefList[LIST_0][j]->iFramePoc < pCtx->sRefPic.pRefList[LIST_0][i]->iFramePoc) {
+          pTemp = pCtx->sRefPic.pRefList[LIST_0][i];
+          pCtx->sRefPic.pRefList[LIST_0][i] = pCtx->sRefPic.pRefList[LIST_0][j];
+          pCtx->sRefPic.pRefList[LIST_0][j] = pTemp;
+        }
+      }
+    }
+  }
+  //long
+  for (int32_t i = 0; i < pCtx->sRefPic.uiLongRefCount[LIST_0]; ++i) {
+    pCtx->sRefPic.pRefList[LIST_0][iCount++] = ppLongRefList[i];
+  }
+  pCtx->sRefPic.uiRefCount[LIST_0] = iCount;
+
+  iCount = 0;
+  //LIST_1
+  //short
+  for (int32_t i = 0; i < iLTCurrPocCount; ++i) {
+    pCtx->sRefPic.pRefList[LIST_1][iCount++] = pLTCurrPocList0[i];
+  }
+  if (iLTCurrPocCount > 1) {
+    //LIST_1 short sorts in increasing order
+    PPicture pTemp;
+    for (int32_t i = 0; i < iLTCurrPocCount; ++i) {
+      for (int32_t j = i + 1; j < iLTCurrPocCount; ++j) {
+        if (pCtx->sRefPic.pRefList[LIST_1][j]->iFramePoc < pCtx->sRefPic.pRefList[LIST_1][i]->iFramePoc) {
+          pTemp = pCtx->sRefPic.pRefList[LIST_1][i];
+          pCtx->sRefPic.pRefList[LIST_1][i] = pCtx->sRefPic.pRefList[LIST_1][j];
+          pCtx->sRefPic.pRefList[LIST_1][j] = pTemp;
+        }
+      }
+    }
+  }
+  for (int32_t i = 0; i < iLSCurrPocCount; ++i) {
+    pCtx->sRefPic.pRefList[LIST_1][iCount++] = pLSCurrPocList0[i];
+  }
+  if (iLSCurrPocCount > 1) {
+    //LIST_1 short sorts in decreasing order
+    PPicture pTemp;
+    for (int32_t i = iLTCurrPocCount; i < iCurrPocCount; ++i) {
+      for (int32_t j = i + 1; j < iCurrPocCount; ++j) {
+        if (pCtx->sRefPic.pRefList[LIST_1][j]->iFramePoc > pCtx->sRefPic.pRefList[LIST_1][i]->iFramePoc) {
+          pTemp = pCtx->sRefPic.pRefList[LIST_1][i];
+          pCtx->sRefPic.pRefList[LIST_1][i] = pCtx->sRefPic.pRefList[LIST_1][j];
+          pCtx->sRefPic.pRefList[LIST_1][j] = pTemp;
+        }
+      }
+    }
+  }
+  //long
+  for (int32_t i = 0; i < pCtx->sRefPic.uiLongRefCount[LIST_0]; ++i) {
+    pCtx->sRefPic.pRefList[LIST_1][iCount++] = ppLongRefList[i];
+  }
+  pCtx->sRefPic.uiRefCount[LIST_1] = iCount;
+  return ERR_NONE;
+}
+
+/**
+ * fills the pRefPic.pRefList.
+ */
+int32_t WelsInitRefList (PWelsDecoderContext pCtx, int32_t iPoc) {
+
+  int32_t err = WelsCheckAndRecoverForFutureDecoding (pCtx);
+  if (err != ERR_NONE) return err;
+
+  WrapShortRefPicNum (pCtx);
+
+  PPicture* ppShoreRefList = pCtx->sRefPic.pShortRefList[LIST_0];
+  PPicture* ppLongRefList  = pCtx->sRefPic.pLongRefList[LIST_0];
+  memset (pCtx->sRefPic.pRefList[LIST_0], 0, MAX_DPB_COUNT * sizeof (PPicture));
+
+  int32_t i, iCount = 0;
+  //short
+  for (i = 0; i < pCtx->sRefPic.uiShortRefCount[LIST_0]; ++i) {
+    pCtx->sRefPic.pRefList[LIST_0][iCount++ ] = ppShoreRefList[i];
+  }
+
+  //long
+  for (i = 0; i < pCtx->sRefPic.uiLongRefCount[LIST_0] ; ++i) {
+    pCtx->sRefPic.pRefList[LIST_0][iCount++  ] = ppLongRefList[i];
+  }
+  pCtx->sRefPic.uiRefCount[LIST_0] = iCount;
+
+  return ERR_NONE;
+}
+
+int32_t WelsReorderRefList (PWelsDecoderContext pCtx) {
+
+  if (pCtx->eSliceType == I_SLICE || pCtx->eSliceType == SI_SLICE) {
+    return ERR_NONE;
+  }
+
+  PRefPicListReorderSyn pRefPicListReorderSyn = pCtx->pCurDqLayer->pRefPicListReordering;
+  PNalUnitHeaderExt pNalHeaderExt = &pCtx->pCurDqLayer->sLayerInfo.sNalHeaderExt;
+  PSliceHeader pSliceHeader = &pCtx->pCurDqLayer->sLayerInfo.sSliceInLayer.sSliceHeaderExt.sSliceHeader;
+  int32_t ListCount = 1;
+  if (pCtx->eSliceType == B_SLICE) ListCount = 2;
+  for (int32_t listIdx = 0; listIdx < ListCount; ++listIdx) {
+    PPicture pPic = NULL;
+    PPicture* ppRefList = pCtx->sRefPic.pRefList[listIdx];
+    int32_t iMaxRefIdx = pCtx->pSps->iNumRefFrames;
+    int32_t iRefCount = pCtx->sRefPic.uiRefCount[listIdx];
+    int32_t iPredFrameNum = pSliceHeader->iFrameNum;
+    int32_t iMaxPicNum = 1 << pSliceHeader->pSps->uiLog2MaxFrameNum;
+    int32_t iAbsDiffPicNum = -1;
+    int32_t iReorderingIndex = 0;
+    int32_t i = 0;
+
+    if (iRefCount <= 0) {
+      pCtx->iErrorCode = dsNoParamSets; //No any reference for decoding, SHOULD request IDR
+      return ERR_INFO_REFERENCE_PIC_LOST;
+    }
+
+    if (pRefPicListReorderSyn->bRefPicListReorderingFlag[listIdx]) {
+      while ((iReorderingIndex < iMaxRefIdx)
+             && (pRefPicListReorderSyn->sReorderingSyn[listIdx][iReorderingIndex].uiReorderingOfPicNumsIdc != 3)) {
+        uint16_t uiReorderingOfPicNumsIdc =
+          pRefPicListReorderSyn->sReorderingSyn[listIdx][iReorderingIndex].uiReorderingOfPicNumsIdc;
+        if (uiReorderingOfPicNumsIdc < 2) {
+          iAbsDiffPicNum = pRefPicListReorderSyn->sReorderingSyn[listIdx][iReorderingIndex].uiAbsDiffPicNumMinus1 + 1;
+
+          if (uiReorderingOfPicNumsIdc == 0) {
+            iPredFrameNum -= iAbsDiffPicNum;
+          } else {
+            iPredFrameNum += iAbsDiffPicNum;
+          }
+          iPredFrameNum &= iMaxPicNum - 1;
+
+          for (i = iMaxRefIdx - 1; i >= 0; i--) {
+            if (ppRefList[i] != NULL && ppRefList[i]->iFrameNum == iPredFrameNum && !ppRefList[i]->bIsLongRef) {
+              if ((pNalHeaderExt->uiQualityId == ppRefList[i]->uiQualityId)
+                  && (pSliceHeader->iSpsId != ppRefList[i]->iSpsId)) {   //check;
+                WelsLog (& (pCtx->sLogCtx), WELS_LOG_WARNING, "WelsReorderRefList()::::BASE LAYER::::iSpsId:%d, ref_sps_id:%d",
+                         pSliceHeader->iSpsId, ppRefList[i]->iSpsId);
+                pCtx->iErrorCode = dsNoParamSets; //cross-IDR reference frame selection, SHOULD request IDR.--
+                return ERR_INFO_REFERENCE_PIC_LOST;
+              } else {
+                break;
+              }
+            }
+          }
+
+        } else if (uiReorderingOfPicNumsIdc == 2) {
+          for (i = iMaxRefIdx - 1; i >= 0; i--) {
+            if (ppRefList[i] != NULL && ppRefList[i]->bIsLongRef
+                && ppRefList[i]->iLongTermFrameIdx ==
+                pRefPicListReorderSyn->sReorderingSyn[listIdx][iReorderingIndex].uiLongTermPicNum) {
+              if ((pNalHeaderExt->uiQualityId == ppRefList[i]->uiQualityId)
+                  && (pSliceHeader->iSpsId != ppRefList[i]->iSpsId)) {    //check;
+                WelsLog (& (pCtx->sLogCtx), WELS_LOG_WARNING, "WelsReorderRefList()::::BASE LAYER::::iSpsId:%d, ref_sps_id:%d",
+                         pSliceHeader->iSpsId, ppRefList[i]->iSpsId);
+                pCtx->iErrorCode = dsNoParamSets; //cross-IDR reference frame selection, SHOULD request IDR.--
+                return ERR_INFO_REFERENCE_PIC_LOST;
+              } else {
+                break;
+              }
+            }
+          }
+        }
+        if (i < 0) {
+          return ERR_INFO_REFERENCE_PIC_LOST;
+        }
+        pPic = ppRefList[i];
+        if (i > iReorderingIndex) {
+          memmove (&ppRefList[1 + iReorderingIndex], &ppRefList[iReorderingIndex],
+                   (i - iReorderingIndex) * sizeof (PPicture)); //confirmed_safe_unsafe_usage
+        } else if (i < iReorderingIndex) {
+          memmove (&ppRefList[1 + iReorderingIndex], &ppRefList[iReorderingIndex],
+                   (iMaxRefIdx - iReorderingIndex) * sizeof (PPicture));
+        }
+        ppRefList[iReorderingIndex] = pPic;
+        iReorderingIndex++;
+      }
+    }
+  }
+  return ERR_NONE;
+}
+
+//WelsReorderRefList2 is the test code
+int32_t WelsReorderRefList2 (PWelsDecoderContext pCtx) {
+
+  if (pCtx->eSliceType == I_SLICE || pCtx->eSliceType == SI_SLICE) {
+    return ERR_NONE;
+  }
+
+  PRefPicListReorderSyn pRefPicListReorderSyn = pCtx->pCurDqLayer->pRefPicListReordering;
+  PSliceHeader pSliceHeader = &pCtx->pCurDqLayer->sLayerInfo.sSliceInLayer.sSliceHeaderExt.sSliceHeader;
+
+  PPicture* ppShoreRefList = pCtx->sRefPic.pShortRefList[LIST_0];
+  int32_t iShortRefCount = pCtx->sRefPic.uiShortRefCount[LIST_0];
+  PPicture* ppLongRefList = pCtx->sRefPic.pLongRefList[LIST_0];
+  int32_t iLongRefCount = pCtx->sRefPic.uiLongRefCount[LIST_0];
+  int32_t i = 0;
+  int32_t j = 0;
+  int32_t k = 0;
+  int32_t iMaxRefIdx = pCtx->pSps->iNumRefFrames;
+  const int32_t iCurFrameNum = pSliceHeader->iFrameNum;
+  const int32_t iMaxPicNum = 1 << pSliceHeader->pSps->uiLog2MaxFrameNum;
+  int32_t iListCount = 1;
+  if (pCtx->eSliceType == B_SLICE) iListCount = 2;
+  for (int32_t listIdx = 0; listIdx < iListCount; ++listIdx) {
+    PPicture* ppRefList = pCtx->sRefPic.pRefList[listIdx];
+    int32_t iCount = 0;
+    int32_t iRefCount = pSliceHeader->uiRefCount[listIdx];
+    int32_t iAbsDiffPicNum = -1;
+
+    if (pRefPicListReorderSyn->bRefPicListReorderingFlag[listIdx]) {
+      int32_t iPredFrameNum = iCurFrameNum;
+      for (i = 0; pRefPicListReorderSyn->sReorderingSyn[listIdx][i].uiReorderingOfPicNumsIdc != 3; i++) {
+        if (iCount >= iMaxRefIdx)
+          break;
+
+        for (j = iRefCount; j > iCount; j--)
+          ppRefList[j] = ppRefList[j - 1];
+
+        uint16_t uiReorderingOfPicNumsIdc =
+          pRefPicListReorderSyn->sReorderingSyn[listIdx][i].uiReorderingOfPicNumsIdc;
+
+        if (uiReorderingOfPicNumsIdc < 2) { // reorder short references
+          iAbsDiffPicNum = (int32_t) (pRefPicListReorderSyn->sReorderingSyn[listIdx][i].uiAbsDiffPicNumMinus1 + 1);
+          if (uiReorderingOfPicNumsIdc == 0) {
+            if (iPredFrameNum - iAbsDiffPicNum < 0)
+              iPredFrameNum -= (iAbsDiffPicNum - iMaxPicNum);
+            else
+              iPredFrameNum -= iAbsDiffPicNum;
+          } else {
+            if (iPredFrameNum + iAbsDiffPicNum >= iMaxPicNum)
+              iPredFrameNum += (iAbsDiffPicNum - iMaxPicNum);
+            else
+              iPredFrameNum += iAbsDiffPicNum;
+          }
+
+          if (iPredFrameNum > iCurFrameNum) {
+            iPredFrameNum -= iMaxPicNum;
+          }
+
+          for (j = 0; j < iShortRefCount; j++) {
+            if (ppShoreRefList[j]) {
+              if (ppShoreRefList[j]->iFrameWrapNum == iPredFrameNum) {
+                ppRefList[iCount++] = ppShoreRefList[j];
+                break;
+              }
+            }
+          }
+          k = iCount;
+          for (j = k; j <= iRefCount; j++) {
+            if (ppRefList[j] != NULL) {
+              if (ppRefList[j]->bIsLongRef || ppRefList[j]->iFrameWrapNum != iPredFrameNum)
+                ppRefList[k++] = ppRefList[j];
+            }
+          }
+        } else { // reorder long term references uiReorderingOfPicNumsIdc == 2
+          iPredFrameNum = pRefPicListReorderSyn->sReorderingSyn[listIdx][i].uiLongTermPicNum;
+          for (j = 0; j < iLongRefCount; j++) {
+            if (ppLongRefList[j] != NULL) {
+              if (ppLongRefList[j]->uiLongTermPicNum == (uint32_t)iPredFrameNum) {
+                ppRefList[iCount++] = ppLongRefList[j];
+                break;
+              }
+            }
+          }
+          k = iCount;
+          for (j = k; j <= iRefCount; j++) {
+            if (ppRefList[j] != NULL) {
+              if (!ppRefList[j]->bIsLongRef || ppLongRefList[j]->uiLongTermPicNum != (uint32_t)iPredFrameNum)
+                ppRefList[k++] = ppRefList[j];
+            }
+          }
+        }
+      }
+    }
+
+    for (i = WELS_MAX (1, WELS_MAX (iCount, pCtx->sRefPic.uiRefCount[listIdx])); i < iRefCount; i++)
+      ppRefList[i] = ppRefList[i - 1];
+    pCtx->sRefPic.uiRefCount[listIdx] = (uint8_t)WELS_MIN (WELS_MAX (iCount, pCtx->sRefPic.uiRefCount[listIdx]), iRefCount);
+  }
+  return ERR_NONE;
+}
+
+int32_t WelsMarkAsRef (PWelsDecoderContext pCtx) {
+  PRefPic pRefPic = &pCtx->sRefPic;
+  PRefPicMarking pRefPicMarking = pCtx->pCurDqLayer->pRefPicMarking;
+  PAccessUnit pCurAU = pCtx->pAccessUnitList;
+  bool bIsIDRAU = false;
+  uint32_t j;
+
+  int32_t iRet = ERR_NONE;
+
+  pCtx->pDec->uiQualityId = pCtx->pCurDqLayer->sLayerInfo.sNalHeaderExt.uiQualityId;
+  pCtx->pDec->uiTemporalId = pCtx->pCurDqLayer->sLayerInfo.sNalHeaderExt.uiTemporalId;
+  pCtx->pDec->iSpsId = pCtx->pSps->iSpsId;
+  pCtx->pDec->iPpsId = pCtx->pPps->iPpsId;
+
+  for (j = pCurAU->uiStartPos; j <= pCurAU->uiEndPos; j++) {
+    if (pCurAU->pNalUnitsList[j]->sNalHeaderExt.sNalUnitHeader.eNalUnitType == NAL_UNIT_CODED_SLICE_IDR
+        || pCurAU->pNalUnitsList[j]->sNalHeaderExt.bIdrFlag) {
+      bIsIDRAU = true;
+      break;
+    }
+  }
+  if (bIsIDRAU) {
+    if (pRefPicMarking->bLongTermRefFlag) {
+      pCtx->sRefPic.iMaxLongTermFrameIdx = 0;
+      AddLongTermToList (pRefPic, pCtx->pDec, 0, 0);
+    } else {
+      pCtx->sRefPic.iMaxLongTermFrameIdx = -1;
+    }
+  } else {
+    if (pRefPicMarking->bAdaptiveRefPicMarkingModeFlag) {
+      iRet = MMCO (pCtx, pRefPicMarking);
+      if (iRet != ERR_NONE) {
+        if (pCtx->pParam->eEcActiveIdc != ERROR_CON_DISABLE) {
+          iRet = RemainOneBufferInDpbForEC (pCtx);
+          WELS_VERIFY_RETURN_IF (iRet, iRet);
+        } else {
+          return iRet;
+        }
+      }
+
+      if (pCtx->bLastHasMmco5) {
+        pCtx->pDec->iFrameNum = 0;
+        pCtx->pDec->iFramePoc = 0;
+      }
+
+    } else {
+      iRet = SlidingWindow (pCtx);
+      if (iRet != ERR_NONE) {
+        if (pCtx->pParam->eEcActiveIdc != ERROR_CON_DISABLE) {
+          iRet = RemainOneBufferInDpbForEC (pCtx);
+          WELS_VERIFY_RETURN_IF (iRet, iRet);
+        } else {
+          return iRet;
+        }
+      }
+    }
+  }
+
+  if (!pCtx->pDec->bIsLongRef) {
+    if (pRefPic->uiLongRefCount[LIST_0] + pRefPic->uiShortRefCount[LIST_0] >= WELS_MAX (1, pCtx->pSps->iNumRefFrames)) {
+      if (pCtx->pParam->eEcActiveIdc != ERROR_CON_DISABLE) {
+        iRet = RemainOneBufferInDpbForEC (pCtx);
+        WELS_VERIFY_RETURN_IF (iRet, iRet);
+      } else {
+        return ERR_INFO_INVALID_MMCO_REF_NUM_OVERFLOW;
+      }
+    }
+    iRet = AddShortTermToList (pRefPic, pCtx->pDec);
+  }
+
+  return iRet;
+}
+
+static int32_t MMCO (PWelsDecoderContext pCtx, PRefPicMarking pRefPicMarking) {
+  PSps pSps = pCtx->pCurDqLayer->sLayerInfo.pSps;
+  int32_t i = 0;
+  int32_t iRet = ERR_NONE;
+  for (i = 0; i < MAX_MMCO_COUNT && pRefPicMarking->sMmcoRef[i].uiMmcoType != MMCO_END; i++) {
+    uint32_t uiMmcoType = pRefPicMarking->sMmcoRef[i].uiMmcoType;
+    int32_t iShortFrameNum = (pCtx->iFrameNum - pRefPicMarking->sMmcoRef[i].iDiffOfPicNum) & ((
+                               1 << pSps->uiLog2MaxFrameNum) - 1);
+    uint32_t uiLongTermPicNum = pRefPicMarking->sMmcoRef[i].uiLongTermPicNum;
+    int32_t iLongTermFrameIdx = pRefPicMarking->sMmcoRef[i].iLongTermFrameIdx;
+    int32_t iMaxLongTermFrameIdx = pRefPicMarking->sMmcoRef[i].iMaxLongTermFrameIdx;
+    if (uiMmcoType > MMCO_LONG) {
+      return ERR_INFO_INVALID_MMCO_OPCODE_BASE;
+    }
+    iRet = MMCOProcess (pCtx, uiMmcoType, iShortFrameNum, uiLongTermPicNum, iLongTermFrameIdx, iMaxLongTermFrameIdx);
+    if (iRet != ERR_NONE) {
+      return iRet;
+    }
+  }
+  if (i == MAX_MMCO_COUNT) { //although Rec does not handle this condition, we here prohibit too many MMCO op
+    return ERR_INFO_INVALID_MMCO_NUM;
+  }
+
+  return ERR_NONE;
+}
+static int32_t MMCOProcess (PWelsDecoderContext pCtx, uint32_t uiMmcoType,
+                            int32_t iShortFrameNum, uint32_t uiLongTermPicNum, int32_t iLongTermFrameIdx, int32_t iMaxLongTermFrameIdx) {
+  PRefPic pRefPic = &pCtx->sRefPic;
+  PPicture pPic = NULL;
+  int32_t i = 0;
+  int32_t iRet = ERR_NONE;
+
+  switch (uiMmcoType) {
+  case MMCO_SHORT2UNUSED:
+    pPic = WelsDelShortFromListSetUnref (pRefPic, iShortFrameNum);
+    if (pPic == NULL) {
+      WelsLog (& (pCtx->sLogCtx), WELS_LOG_WARNING, "MMCO_SHORT2UNUSED: delete an empty entry from short term list");
+    }
+    break;
+  case MMCO_LONG2UNUSED:
+    pPic = WelsDelLongFromListSetUnref (pRefPic, uiLongTermPicNum);
+    if (pPic == NULL) {
+      WelsLog (& (pCtx->sLogCtx), WELS_LOG_WARNING, "MMCO_LONG2UNUSED: delete an empty entry from long term list");
+    }
+    break;
+  case MMCO_SHORT2LONG:
+    if (iLongTermFrameIdx > pRefPic->iMaxLongTermFrameIdx) {
+      return ERR_INFO_INVALID_MMCO_LONG_TERM_IDX_EXCEED_MAX;
+    }
+    pPic = WelsDelShortFromList (pRefPic, iShortFrameNum);
+    if (pPic == NULL) {
+      WelsLog (& (pCtx->sLogCtx), WELS_LOG_WARNING, "MMCO_LONG2LONG: delete an empty entry from short term list");
+      break;
+    }
+    WelsDelLongFromListSetUnref (pRefPic, iLongTermFrameIdx);
+#ifdef LONG_TERM_REF
+    pCtx->bCurAuContainLtrMarkSeFlag = true;
+    pCtx->iFrameNumOfAuMarkedLtr      = iShortFrameNum;
+    WelsLog (& (pCtx->sLogCtx), WELS_LOG_INFO, "ex_mark_avc():::MMCO_SHORT2LONG:::LTR marking....iFrameNum: %d",
+             pCtx->iFrameNumOfAuMarkedLtr);
+#endif
+
+    MarkAsLongTerm (pRefPic, iShortFrameNum, iLongTermFrameIdx, uiLongTermPicNum);
+    break;
+  case MMCO_SET_MAX_LONG:
+    pRefPic->iMaxLongTermFrameIdx = iMaxLongTermFrameIdx;
+    for (i = 0 ; i < pRefPic->uiLongRefCount[LIST_0]; i++) {
+      if (pRefPic->pLongRefList[LIST_0][i]->iLongTermFrameIdx > pRefPic->iMaxLongTermFrameIdx) {
+        WelsDelLongFromListSetUnref (pRefPic, pRefPic->pLongRefList[LIST_0][i]->iLongTermFrameIdx);
+      }
+    }
+    break;
+  case MMCO_RESET:
+    WelsResetRefPic (pCtx);
+    pCtx->bLastHasMmco5 = true;
+    break;
+  case MMCO_LONG:
+    if (iLongTermFrameIdx > pRefPic->iMaxLongTermFrameIdx) {
+      return ERR_INFO_INVALID_MMCO_LONG_TERM_IDX_EXCEED_MAX;
+    }
+    WelsDelLongFromListSetUnref (pRefPic, iLongTermFrameIdx);
+    if (pRefPic->uiLongRefCount[LIST_0] + pRefPic->uiShortRefCount[LIST_0] >= WELS_MAX (1, pCtx->pSps->iNumRefFrames)) {
+      return ERR_INFO_INVALID_MMCO_REF_NUM_OVERFLOW;
+    }
+#ifdef LONG_TERM_REF
+    pCtx->bCurAuContainLtrMarkSeFlag = true;
+    pCtx->iFrameNumOfAuMarkedLtr      = pCtx->iFrameNum;
+    WelsLog (& (pCtx->sLogCtx), WELS_LOG_INFO, "ex_mark_avc():::MMCO_LONG:::LTR marking....iFrameNum: %d",
+             pCtx->iFrameNum);
+#endif
+    iRet = AddLongTermToList (pRefPic, pCtx->pDec, iLongTermFrameIdx, uiLongTermPicNum);
+    break;
+  default :
+    break;
+  }
+
+  return iRet;
+}
+
+static int32_t SlidingWindow (PWelsDecoderContext pCtx) {
+  PRefPic pRefPic = &pCtx->sRefPic;
+  PPicture pPic = NULL;
+  int32_t i = 0;
+
+  if (pCtx->sRefPic.uiShortRefCount[LIST_0] + pCtx->sRefPic.uiLongRefCount[LIST_0] >= pCtx->pSps->iNumRefFrames) {
+    if (pCtx->sRefPic.uiShortRefCount[LIST_0] == 0) {
+      WelsLog (& (pCtx->sLogCtx), WELS_LOG_ERROR, "No reference picture in short term list when sliding window");
+      return ERR_INFO_INVALID_MMCO_REF_NUM_NOT_ENOUGH;
+    }
+    for (i = pRefPic->uiShortRefCount[LIST_0] - 1; i >= 0; i--) {
+      pPic = WelsDelShortFromList (pRefPic, pRefPic->pShortRefList[LIST_0][i]->iFrameNum);
+      if (pPic) {
+        SetUnRef (pPic);
+        break;
+      } else {
+        return ERR_INFO_INVALID_MMCO_REF_NUM_OVERFLOW;
+      }
+    }
+  }
+  return ERR_NONE;
+}
+
+static PPicture WelsDelShortFromList (PRefPic pRefPic, int32_t iFrameNum) {
+  int32_t i = 0;
+  int32_t iMoveSize = 0;
+  PPicture pPic = NULL;
+
+  for (i = 0; i < pRefPic->uiShortRefCount[LIST_0]; i++) {
+    if (pRefPic->pShortRefList[LIST_0][i]->iFrameNum == iFrameNum) {
+      iMoveSize = pRefPic->uiShortRefCount[LIST_0] - i - 1;
+      pRefPic->pShortRefList[LIST_0][i]->bUsedAsRef = false;
+      pPic = pRefPic->pShortRefList[LIST_0][i];
+      pRefPic->pShortRefList[LIST_0][i] = NULL;
+      if (iMoveSize > 0) {
+        memmove (&pRefPic->pShortRefList[LIST_0][i], &pRefPic->pShortRefList[LIST_0][i + 1],
+                 iMoveSize * sizeof (PPicture)); //confirmed_safe_unsafe_usage
+      }
+      pRefPic->uiShortRefCount[LIST_0]--;
+      pRefPic->pShortRefList[LIST_0][pRefPic->uiShortRefCount[LIST_0]] = NULL;
+      break;
+    }
+  }
+
+  return pPic;
+}
+
+static PPicture WelsDelShortFromListSetUnref (PRefPic pRefPic, int32_t iFrameNum) {
+  PPicture pPic = WelsDelShortFromList (pRefPic, iFrameNum);
+  if (pPic) {
+    SetUnRef (pPic);
+  }
+  return pPic;
+}
+
+static PPicture WelsDelLongFromList (PRefPic pRefPic, uint32_t uiLongTermFrameIdx) {
+  PPicture pPic = NULL;
+  int32_t i = 0;
+  for (i = 0; i < pRefPic->uiLongRefCount[LIST_0]; i++) {
+    pPic = pRefPic->pLongRefList[LIST_0][i];
+    if (pPic->iLongTermFrameIdx == (int32_t)uiLongTermFrameIdx) {
+      int32_t iMoveSize = pRefPic->uiLongRefCount[LIST_0] - i - 1;
+      pPic->bUsedAsRef = false;
+      pPic->bIsLongRef = false;
+      if (iMoveSize > 0) {
+        memmove (&pRefPic->pLongRefList[LIST_0][i], &pRefPic->pLongRefList[LIST_0][i + 1],
+                 iMoveSize * sizeof (PPicture)); //confirmed_safe_unsafe_usage
+      }
+      pRefPic->uiLongRefCount[LIST_0]--;
+      pRefPic->pLongRefList[LIST_0][pRefPic->uiLongRefCount[LIST_0]] = NULL;
+      return pPic;
+    }
+  }
+  return NULL;
+}
+
+static PPicture WelsDelLongFromListSetUnref (PRefPic pRefPic, uint32_t uiLongTermFrameIdx) {
+  PPicture pPic = WelsDelLongFromList (pRefPic, uiLongTermFrameIdx);
+  if (pPic) {
+    SetUnRef (pPic);
+  }
+  return pPic;
+}
+
+static int32_t AddShortTermToList (PRefPic pRefPic, PPicture pPic) {
+  pPic->bUsedAsRef = true;
+  pPic->bIsLongRef = false;
+  pPic->iLongTermFrameIdx = -1;
+  if (pRefPic->uiShortRefCount[LIST_0] > 0) {
+    // Check the duplicate frame_num in short ref list
+    for (int32_t iPos = 0; iPos < pRefPic->uiShortRefCount[LIST_0]; iPos++) {
+      if (pPic->iFrameNum == pRefPic->pShortRefList[LIST_0][iPos]->iFrameNum) {
+        // Replace the previous ref pic with the new one with the same frame_num
+        pRefPic->pShortRefList[LIST_0][iPos] = pPic;
+        return ERR_INFO_DUPLICATE_FRAME_NUM;
+      }
+    }
+
+    memmove (&pRefPic->pShortRefList[LIST_0][1], &pRefPic->pShortRefList[LIST_0][0],
+             pRefPic->uiShortRefCount[LIST_0]*sizeof (PPicture));//confirmed_safe_unsafe_usage
+  }
+  pRefPic->pShortRefList[LIST_0][0] = pPic;
+  pRefPic->uiShortRefCount[LIST_0]++;
+  return ERR_NONE;
+}
+
+static int32_t AddLongTermToList (PRefPic pRefPic, PPicture pPic, int32_t iLongTermFrameIdx,
+                                  uint32_t uiLongTermPicNum) {
+  int32_t i = 0;
+
+  pPic->bUsedAsRef = true;
+  pPic->bIsLongRef = true;
+  pPic->iLongTermFrameIdx = iLongTermFrameIdx;
+  pPic->uiLongTermPicNum = uiLongTermPicNum;
+  if (pRefPic->uiLongRefCount[LIST_0] == 0) {
+    pRefPic->pLongRefList[LIST_0][pRefPic->uiLongRefCount[LIST_0]] = pPic;
+  } else {
+    for (i = 0; i < pRefPic->uiLongRefCount[LIST_0]; i++) {
+      if (pRefPic->pLongRefList[LIST_0][i]->iLongTermFrameIdx > pPic->iLongTermFrameIdx) {
+        break;
+      }
+    }
+    memmove (&pRefPic->pLongRefList[LIST_0][i + 1], &pRefPic->pLongRefList[LIST_0][i],
+             (pRefPic->uiLongRefCount[LIST_0] - i)*sizeof (PPicture)); //confirmed_safe_unsafe_usage
+    pRefPic->pLongRefList[LIST_0][i] = pPic;
+  }
+
+  pRefPic->uiLongRefCount[LIST_0]++;
+  return ERR_NONE;
+}
+
+static int32_t MarkAsLongTerm (PRefPic pRefPic, int32_t iFrameNum, int32_t iLongTermFrameIdx,
+                               uint32_t uiLongTermPicNum) {
+  PPicture pPic = NULL;
+  int32_t i = 0;
+  int32_t iRet = ERR_NONE;
+  WelsDelLongFromListSetUnref (pRefPic, iLongTermFrameIdx);
+
+  for (i = 0; i < pRefPic->uiRefCount[LIST_0]; i++) {
+    pPic = pRefPic->pRefList[LIST_0][i];
+    if (pPic->iFrameNum == iFrameNum && !pPic->bIsLongRef) {
+      iRet = AddLongTermToList (pRefPic, pPic, iLongTermFrameIdx, uiLongTermPicNum);
+      break;
+    }
+  }
+
+  return iRet;
+}
+
+#ifdef LONG_TERM_REF
+int32_t GetLTRFrameIndex (PRefPic pRefPic, int32_t iAncLTRFrameNum) {
+  int32_t iLTRFrameIndex = -1;
+  PPicture pPic;
+  for (int i = 0; i < pRefPic->uiLongRefCount[0]; ++i) {
+    pPic = pRefPic->pLongRefList[LIST_0][i];
+    if (pPic->iFrameNum == iAncLTRFrameNum) {
+      return (pPic->iLongTermFrameIdx);
+    }
+  }
+  return iLTRFrameIndex;
+}
+#endif
+
+static int32_t RemainOneBufferInDpbForEC (PWelsDecoderContext pCtx) {
+  int32_t iRet = ERR_NONE;
+  PRefPic pRefPic = &pCtx->sRefPic;
+  if (pRefPic->uiShortRefCount[0] + pRefPic->uiLongRefCount[0] < pCtx->pSps->iNumRefFrames)
+    return iRet;
+
+  if (pRefPic->uiShortRefCount[0] > 0) {
+    iRet = SlidingWindow (pCtx);
+  } else { //all LTR, remove the smallest long_term_frame_idx
+    int32_t iLongTermFrameIdx = 0;
+    int32_t iMaxLongTermFrameIdx = pRefPic->iMaxLongTermFrameIdx;
+#ifdef LONG_TERM_REF
+    int32_t iCurrLTRFrameIdx = GetLTRFrameIndex (pRefPic, pCtx->iFrameNumOfAuMarkedLtr);
+#endif
+    while ((pRefPic->uiLongRefCount[0] >= pCtx->pSps->iNumRefFrames) && (iLongTermFrameIdx <= iMaxLongTermFrameIdx)) {
+#ifdef LONG_TERM_REF
+      if (iLongTermFrameIdx == iCurrLTRFrameIdx) {
+        iLongTermFrameIdx++;
+        continue;
+      }
+#endif
+      WelsDelLongFromListSetUnref (pRefPic, iLongTermFrameIdx);
+      iLongTermFrameIdx++;
+    }
+  }
+  if (pRefPic->uiShortRefCount[0] + pRefPic->uiLongRefCount[0] >=
+      pCtx->pSps->iNumRefFrames) { //fail to remain one empty buffer in DPB
+    WelsLog (& (pCtx->sLogCtx), WELS_LOG_WARNING, "RemainOneBufferInDpbForEC(): empty one DPB failed for EC!");
+    iRet = ERR_INFO_REF_COUNT_OVERFLOW;
+  }
+
+  return iRet;
+}
+
+} // namespace WelsDec
--- a/codec/decoder/core/src/mv_pred.cpp
+++ b/codec/decoder/core/src/mv_pred.cpp
@@ -41,8 +41,120 @@
 #include "mv_pred.h"
 #include "ls_defines.h"
 #include "mb_cache.h"
+#include "parse_mb_syn_cabac.h"
 
 namespace WelsDec {
+
+static inline  void SetRectBlock (void* vp, int32_t w, const int32_t h, int32_t stride, const uint32_t val,
+                                  const int32_t size) {
+  uint8_t* p = (uint8_t*)vp;
+  w *= size;
+  if (w == 1 && h == 4) {
+    * (uint8_t*) (p + 0 * stride) =
+      * (uint8_t*) (p + 1 * stride) =
+        * (uint8_t*) (p + 2 * stride) =
+          * (uint8_t*) (p + 3 * stride) = (uint8_t)val;
+  } else if (w == 2 && h == 2) {
+    * (uint16_t*) (p + 0 * stride) =
+      * (uint16_t*) (p + 1 * stride) = size == 4 ? (uint16_t)val : (uint16_t) (val * 0x0101U);
+  } else if (w == 2 && h == 4) {
+    * (uint16_t*) (p + 0 * stride) =
+      * (uint16_t*) (p + 1 * stride) =
+        * (uint16_t*) (p + 2 * stride) =
+          * (uint16_t*) (p + 3 * stride) = size == 4 ? (uint16_t)val : (uint16_t) (val * 0x0101U);
+  } else if (w == 4 && h == 2) {
+    * (uint32_t*) (p + 0 * stride) =
+      * (uint32_t*) (p + 1 * stride) = size == 4 ? val : (uint32_t) (val * 0x01010101UL);
+  } else if (w == 4 && h == 4) {
+    * (uint32_t*) (p + 0 * stride) =
+      * (uint32_t*) (p + 1 * stride) =
+        * (uint32_t*) (p + 2 * stride) =
+          * (uint32_t*) (p + 3 * stride) = size == 4 ? val : (uint32_t) (val * 0x01010101UL);
+  } else if (w == 8 && h == 1) {
+    * (uint32_t*) (p + 0 * stride) =
+      * (uint32_t*) (p + 0 * stride + 4) = size == 4 ? val : (uint32_t) (val * 0x01010101UL);
+  } else if (w == 8 && h == 2) {
+    * (uint32_t*) (p + 0 * stride) =
+      * (uint32_t*) (p + 0 * stride + 4) =
+        * (uint32_t*) (p + 1 * stride) =
+          * (uint32_t*) (p + 1 * stride + 4) = size == 4 ? val : (uint32_t) (val * 0x01010101UL);
+  } else if (w == 8 && h == 4) {
+    * (uint32_t*) (p + 0 * stride) =
+      * (uint32_t*) (p + 0 * stride + 4) =
+        * (uint32_t*) (p + 1 * stride) =
+          * (uint32_t*) (p + 1 * stride + 4) =
+            * (uint32_t*) (p + 2 * stride) =
+              * (uint32_t*) (p + 2 * stride + 4) =
+                * (uint32_t*) (p + 3 * stride) =
+                  * (uint32_t*) (p + 3 * stride + 4) = size == 4 ? val : (uint32_t) (val * 0x01010101UL);
+  } else if (w == 16 && h == 2) {
+    * (uint32_t*) (p + 0 * stride + 0) =
+      * (uint32_t*) (p + 0 * stride + 4) =
+        * (uint32_t*) (p + 0 * stride + 8) =
+          * (uint32_t*) (p + 0 * stride + 12) =
+            * (uint32_t*) (p + 1 * stride + 0) =
+              * (uint32_t*) (p + 1 * stride + 4) =
+                * (uint32_t*) (p + 1 * stride + 8) =
+                  * (uint32_t*) (p + 1 * stride + 12) = size == 4 ? val : (uint32_t) (val * 0x01010101UL);
+  } else if (w == 16 && h == 3) {
+    * (uint32_t*) (p + 0 * stride + 0) =
+      * (uint32_t*) (p + 0 * stride + 4) =
+        * (uint32_t*) (p + 0 * stride + 8) =
+          * (uint32_t*) (p + 0 * stride + 12) =
+            * (uint32_t*) (p + 1 * stride + 0) =
+              * (uint32_t*) (p + 1 * stride + 4) =
+                * (uint32_t*) (p + 1 * stride + 8) =
+                  * (uint32_t*) (p + 1 * stride + 12) =
+                    * (uint32_t*) (p + 2 * stride + 0) =
+                      * (uint32_t*) (p + 2 * stride + 4) =
+                        * (uint32_t*) (p + 2 * stride + 8) =
+                          * (uint32_t*) (p + 2 * stride + 12) = size == 4 ? val : (uint32_t) (val * 0x01010101UL);
+  } else if (w == 16 && h == 4) {
+    * (uint32_t*) (p + 0 * stride + 0) =
+      * (uint32_t*) (p + 0 * stride + 4) =
+        * (uint32_t*) (p + 0 * stride + 8) =
+          * (uint32_t*) (p + 0 * stride + 12) =
+            * (uint32_t*) (p + 1 * stride + 0) =
+              * (uint32_t*) (p + 1 * stride + 4) =
+                * (uint32_t*) (p + 1 * stride + 8) =
+                  * (uint32_t*) (p + 1 * stride + 12) =
+                    * (uint32_t*) (p + 2 * stride + 0) =
+                      * (uint32_t*) (p + 2 * stride + 4) =
+                        * (uint32_t*) (p + 2 * stride + 8) =
+                          * (uint32_t*) (p + 2 * stride + 12) =
+                            * (uint32_t*) (p + 3 * stride + 0) =
+                              * (uint32_t*) (p + 3 * stride + 4) =
+                                * (uint32_t*) (p + 3 * stride + 8) =
+                                  * (uint32_t*) (p + 3 * stride + 12) = size == 4 ? val : (uint32_t) (val * 0x01010101UL);
+  }
+}
+void CopyRectBlock4Cols (void* vdst, void* vsrc, const int32_t stride_dst, const int32_t stride_src, int32_t w,
+                         const int32_t size) {
+  uint8_t* dst = (uint8_t*)vdst;
+  uint8_t* src = (uint8_t*)vsrc;
+  w *= size;
+  if (w == 1) {
+    dst[stride_dst * 0] = src[stride_src * 0];
+    dst[stride_dst * 1] = src[stride_src * 1];
+    dst[stride_dst * 2] = src[stride_src * 2];
+    dst[stride_dst * 3] = src[stride_src * 3];
+  } else if (w == 2) {
+    * (uint16_t*) (&dst[stride_dst * 0]) = * (uint16_t*) (&src[stride_src * 0]);
+    * (uint16_t*) (&dst[stride_dst * 1]) = * (uint16_t*) (&src[stride_src * 1]);
+    * (uint16_t*) (&dst[stride_dst * 2]) = * (uint16_t*) (&src[stride_src * 2]);
+    * (uint16_t*) (&dst[stride_dst * 3]) = * (uint16_t*) (&src[stride_src * 3]);
+  } else if (w == 4) {
+    * (uint32_t*) (&dst[stride_dst * 0]) = * (uint32_t*) (&src[stride_src * 0]);
+    * (uint32_t*) (&dst[stride_dst * 1]) = * (uint32_t*) (&src[stride_src * 1]);
+    * (uint32_t*) (&dst[stride_dst * 2]) = * (uint32_t*) (&src[stride_src * 2]);
+    * (uint32_t*) (&dst[stride_dst * 3]) = * (uint32_t*) (&src[stride_src * 3]);
+  } else if (w == 16) {
+    memcpy (&dst[stride_dst * 0], &src[stride_src * 0], 16);
+    memcpy (&dst[stride_dst * 1], &src[stride_src * 1], 16);
+    memcpy (&dst[stride_dst * 2], &src[stride_src * 2], 16);
+    memcpy (&dst[stride_dst * 3], &src[stride_src * 3], 16);
+  }
+}
 void PredPSkipMvFromNeighbor (PDqLayer pCurLayer, int16_t iMvp[2]) {
   bool bTopAvail, bLeftTopAvail, bRightTopAvail, bLeftAvail;
 
@@ -192,19 +304,464 @@
   }
 }
 
+int32_t GetColocatedMb (PWelsDecoderContext pCtx, MbType& mbType, SubMbType& subMbType) {
+  PDqLayer pCurLayer = pCtx->pCurDqLayer;
+  int32_t iMbXy = pCurLayer->iMbXyIndex;
 
+  uint32_t is8x8 = IS_Inter_8x8 (pCurLayer->pMbType[iMbXy]);
+  mbType = pCurLayer->pMbType[iMbXy];
+
+  PPicture colocPic = pCtx->sRefPic.pRefList[LIST_1][0];
+
+  MbType coloc_mbType = colocPic->pMbType[iMbXy];
+
+  if (IS_Inter_8x8 (coloc_mbType) && !pCtx->pSps->bDirect8x8InferenceFlag) {
+    subMbType = SUB_MB_TYPE_4x4 | MB_TYPE_P0L0 | MB_TYPE_P0L1 | MB_TYPE_DIRECT;
+    mbType |= MB_TYPE_8x8 | MB_TYPE_L0 | MB_TYPE_L1;
+  } else if (!is8x8 && (IS_INTER_16x16 (coloc_mbType) || IS_INTRA (coloc_mbType)/* || IS_SKIP(coloc_mbType)*/)) {
+    subMbType = SUB_MB_TYPE_8x8 | MB_TYPE_P0L0 | MB_TYPE_P0L1 | MB_TYPE_DIRECT;
+    mbType |= MB_TYPE_16x16 | MB_TYPE_L0 | MB_TYPE_L1;
+  } else {
+    subMbType = SUB_MB_TYPE_8x8 | MB_TYPE_P0L0 | MB_TYPE_P0L1 | MB_TYPE_DIRECT;
+    mbType |= MB_TYPE_8x8 | MB_TYPE_L0 | MB_TYPE_L1;
+  }
+
+  if (IS_INTRA (coloc_mbType)) {
+    SetRectBlock (pCurLayer->iColocIntra, 4, 4, 4 * sizeof (int8_t), 1, sizeof (int8_t));
+    return 1;
+  }
+  SetRectBlock (pCurLayer->iColocIntra, 4, 4, 4 * sizeof (int8_t), 0, sizeof (int8_t));
+
+  if (IS_INTER_16x16 (mbType)) {
+    int16_t iMVZero[2] = { 0 };
+    int16_t* pMv = IS_TYPE_L1 (coloc_mbType) ? colocPic->pMv[LIST_1][iMbXy][0] : iMVZero;
+    ST32 (pCurLayer->iColocMv[LIST_0][0], LD32 (colocPic->pMv[LIST_0][iMbXy][0]));
+    ST32 (pCurLayer->iColocMv[LIST_1][0], LD32 (pMv));
+    pCurLayer->iColocRefIndex[LIST_0][0] = colocPic->pRefIndex[LIST_0][iMbXy][0];
+    pCurLayer->iColocRefIndex[LIST_1][0] = IS_TYPE_L1 (coloc_mbType) ? colocPic->pRefIndex[LIST_1][iMbXy][0] :
+                                           REF_NOT_IN_LIST;
+  } else {
+    if (!pCtx->pSps->bDirect8x8InferenceFlag) {
+      CopyRectBlock4Cols (pCurLayer->iColocMv[LIST_0], colocPic->pMv[LIST_0][iMbXy], 16, 16, 4, 4);
+      CopyRectBlock4Cols (pCurLayer->iColocRefIndex[LIST_0], colocPic->pRefIndex[LIST_0][iMbXy], 4, 4, 4, 1);
+      if (IS_TYPE_L1 (coloc_mbType)) {
+        CopyRectBlock4Cols (pCurLayer->iColocMv[LIST_1], colocPic->pMv[LIST_1][iMbXy], 16, 16, 4, 4);
+        CopyRectBlock4Cols (pCurLayer->iColocRefIndex[LIST_1], colocPic->pRefIndex[LIST_1][iMbXy], 4, 4, 4, 1);
+      } else { // only forward prediction
+        SetRectBlock (pCurLayer->iColocRefIndex[LIST_1], 4, 4, 4, (uint8_t)REF_NOT_IN_LIST, 1);
+      }
+    } else {
+      for (int32_t listIdx = 0; listIdx < 1 + !! (coloc_mbType & MB_TYPE_L1); listIdx++) {
+        SetRectBlock (pCurLayer->iColocMv[listIdx][0], 2, 2, 16, LD32 (colocPic->pMv[listIdx][iMbXy][0]), 4);
+        SetRectBlock (pCurLayer->iColocMv[listIdx][2], 2, 2, 16, LD32 (colocPic->pMv[listIdx][iMbXy][3]), 4);
+        SetRectBlock (pCurLayer->iColocMv[listIdx][8], 2, 2, 16, LD32 (colocPic->pMv[listIdx][iMbXy][12]), 4);
+        SetRectBlock (pCurLayer->iColocMv[listIdx][10], 2, 2, 16, LD32 (colocPic->pMv[listIdx][iMbXy][15]), 4);
+
+        SetRectBlock (&pCurLayer->iColocRefIndex[listIdx][0], 2, 2, 4, colocPic->pRefIndex[listIdx][iMbXy][0], 1);
+        SetRectBlock (&pCurLayer->iColocRefIndex[listIdx][2], 2, 2, 4, colocPic->pRefIndex[listIdx][iMbXy][3], 1);
+        SetRectBlock (&pCurLayer->iColocRefIndex[listIdx][8], 2, 2, 4, colocPic->pRefIndex[listIdx][iMbXy][12], 1);
+        SetRectBlock (&pCurLayer->iColocRefIndex[listIdx][10], 2, 2, 4, colocPic->pRefIndex[listIdx][iMbXy][15], 1);
+      }
+      if (! (coloc_mbType & MB_TYPE_L1)) // only forward prediction
+        SetRectBlock (&pCurLayer->iColocRefIndex[1][0], 4, 4, 4, (uint8_t)REF_NOT_IN_LIST, 1);
+    }
+  }
+  return 1;
+}
+
+SubMbType PredMvBDirectSpatial (PWelsDecoderContext pCtx, int16_t iMvp[LIST_A][2], int8_t ref[LIST_A]) {
+
+  PDqLayer pCurLayer = pCtx->pCurDqLayer;
+  int32_t iMbXy = pCurLayer->iMbXyIndex;
+  bool bSkipOrDirect = (IS_SKIP (pCurLayer->pMbType[iMbXy]) | IS_DIRECT (pCurLayer->pMbType[iMbXy])) > 0;
+
+  MbType mbType;
+  SubMbType subMbType;
+  GetColocatedMb (pCtx, mbType, subMbType);
+
+  bool bTopAvail, bLeftTopAvail, bRightTopAvail, bLeftAvail;
+  int32_t iLeftTopType, iRightTopType, iTopType, iLeftType;
+  int32_t iCurSliceIdc, iTopSliceIdc, iLeftTopSliceIdc, iRightTopSliceIdc, iLeftSliceIdc;
+  int32_t iCurX, iCurY, iCurXy, iLeftXy = 0, iTopXy = 0, iLeftTopXy = 0, iRightTopXy = 0;
+
+  int8_t iLeftRef[LIST_A];
+  int8_t iTopRef[LIST_A];
+  int8_t iRightTopRef[LIST_A];
+  int8_t iLeftTopRef[LIST_A];
+  int8_t iDiagonalRef[LIST_A];
+  int16_t iMvA[LIST_A][2], iMvB[LIST_A][2], iMvC[LIST_A][2], iMvD[LIST_A][2];
+
+  iCurXy = pCurLayer->iMbXyIndex;
+
+  iCurX = pCurLayer->iMbX;
+  iCurY = pCurLayer->iMbY;
+  iCurSliceIdc = pCurLayer->pSliceIdc[iCurXy];
+
+  if (iCurX != 0) {
+    iLeftXy = iCurXy - 1;
+    iLeftSliceIdc = pCurLayer->pSliceIdc[iLeftXy];
+    bLeftAvail = (iLeftSliceIdc == iCurSliceIdc);
+  } else {
+    bLeftAvail = 0;
+    bLeftTopAvail = 0;
+  }
+
+  if (iCurY != 0) {
+    iTopXy = iCurXy - pCurLayer->iMbWidth;
+    iTopSliceIdc = pCurLayer->pSliceIdc[iTopXy];
+    bTopAvail = (iTopSliceIdc == iCurSliceIdc);
+    if (iCurX != 0) {
+      iLeftTopXy = iTopXy - 1;
+      iLeftTopSliceIdc = pCurLayer->pSliceIdc[iLeftTopXy];
+      bLeftTopAvail = (iLeftTopSliceIdc == iCurSliceIdc);
+    } else {
+      bLeftTopAvail = 0;
+    }
+    if (iCurX != (pCurLayer->iMbWidth - 1)) {
+      iRightTopXy = iTopXy + 1;
+      iRightTopSliceIdc = pCurLayer->pSliceIdc[iRightTopXy];
+      bRightTopAvail = (iRightTopSliceIdc == iCurSliceIdc);
+    } else {
+      bRightTopAvail = 0;
+    }
+  } else {
+    bTopAvail = 0;
+    bLeftTopAvail = 0;
+    bRightTopAvail = 0;
+  }
+
+  iLeftType = ((iCurX != 0 && bLeftAvail) ? pCurLayer->pMbType[iLeftXy] : 0);
+  iTopType = ((iCurY != 0 && bTopAvail) ? pCurLayer->pMbType[iTopXy] : 0);
+  iLeftTopType = ((iCurX != 0 && iCurY != 0 && bLeftTopAvail)
+                  ? pCurLayer->pMbType[iLeftTopXy] : 0);
+  iRightTopType = ((iCurX != pCurLayer->iMbWidth - 1 && iCurY != 0 && bRightTopAvail)
+                   ? pCurLayer->pMbType[iRightTopXy] : 0);
+
+  /*get neb mv&iRefIdxArray*/
+  for (int32_t listIdx = LIST_0; listIdx < LIST_A; ++listIdx) {
+
+    /*left*/
+    if (bLeftAvail && IS_INTER (iLeftType)) {
+      ST32 (iMvA[listIdx], LD32 (pCurLayer->pMv[listIdx][iLeftXy][3]));
+      iLeftRef[listIdx] = pCurLayer->pRefIndex[listIdx][iLeftXy][3];
+    } else {
+      ST32 (iMvA[listIdx], 0);
+      if (0 == bLeftAvail) { //not available
+        iLeftRef[listIdx] = REF_NOT_AVAIL;
+      } else { //available but is intra mb type
+        iLeftRef[listIdx] = REF_NOT_IN_LIST;
+      }
+    }
+
+    /*top*/
+    if (bTopAvail && IS_INTER (iTopType)) {
+      ST32 (iMvB[listIdx], LD32 (pCurLayer->pMv[listIdx][iTopXy][12]));
+      iTopRef[listIdx] = pCurLayer->pRefIndex[listIdx][iTopXy][12];
+    } else {
+      ST32 (iMvB[listIdx], 0);
+      if (0 == bTopAvail) { //not available
+        iTopRef[listIdx] = REF_NOT_AVAIL;
+      } else { //available but is intra mb type
+        iTopRef[listIdx] = REF_NOT_IN_LIST;
+      }
+    }
+
+    /*right_top*/
+    if (bRightTopAvail && IS_INTER (iRightTopType)) {
+      ST32 (iMvC[listIdx], LD32 (pCurLayer->pMv[listIdx][iRightTopXy][12]));
+      iRightTopRef[listIdx] = pCurLayer->pRefIndex[listIdx][iRightTopXy][12];
+    } else {
+      ST32 (iMvC[listIdx], 0);
+      if (0 == bRightTopAvail) { //not available
+        iRightTopRef[listIdx] = REF_NOT_AVAIL;
+      } else { //available but is intra mb type
+        iRightTopRef[listIdx] = REF_NOT_IN_LIST;
+      }
+    }
+    /*left_top*/
+    if (bLeftTopAvail && IS_INTER (iLeftTopType)) {
+      ST32 (iMvD[listIdx], LD32 (pCurLayer->pMv[listIdx][iLeftTopXy][15]));
+      iLeftTopRef[listIdx] = pCurLayer->pRefIndex[listIdx][iLeftTopXy][15];
+    } else {
+      ST32 (iMvD[listIdx], 0);
+      if (0 == bLeftTopAvail) { //not available
+        iLeftTopRef[listIdx] = REF_NOT_AVAIL;
+      } else { //available but is intra mb type
+        iLeftTopRef[listIdx] = REF_NOT_IN_LIST;
+      }
+    }
+
+    iDiagonalRef[listIdx] = iRightTopRef[listIdx];
+    if (REF_NOT_AVAIL == iDiagonalRef[listIdx]) {
+      iDiagonalRef[listIdx] = iLeftTopRef[listIdx];
+      * (int32_t*)iMvC[listIdx] = * (int32_t*)iMvD[listIdx];
+    }
+
+    int8_t ref_temp = WELS_MIN_POSITIVE (iTopRef[listIdx], iDiagonalRef[listIdx]);
+    ref[listIdx] = WELS_MIN_POSITIVE (iLeftRef[listIdx], ref_temp);
+    if (ref[listIdx] >= 0) {
+
+      uint32_t match_count = (iLeftRef[listIdx] == ref[listIdx]) + (iTopRef[listIdx] == ref[listIdx]) +
+                             (iDiagonalRef[listIdx] == ref[listIdx]);
+      if (match_count == 1) {
+        if (iLeftRef[listIdx] == ref[listIdx]) {
+          ST32 (iMvp[listIdx], LD32 (iMvA[listIdx]));
+        } else if (iTopRef[listIdx] == ref[listIdx]) {
+          ST32 (iMvp[listIdx], LD32 (iMvB[listIdx]));
+        } else {
+          ST32 (iMvp[listIdx], LD32 (iMvC[listIdx]));
+        }
+      } else {
+        iMvp[listIdx][0] = WelsMedian (iMvA[listIdx][0], iMvB[listIdx][0], iMvC[listIdx][0]);
+        iMvp[listIdx][1] = WelsMedian (iMvA[listIdx][1], iMvB[listIdx][1], iMvC[listIdx][1]);
+      }
+    } else {
+      iMvp[listIdx][0] = 0;
+      iMvp[listIdx][1] = 0;
+      ref[listIdx] = REF_NOT_IN_LIST;
+    }
+  }
+  if (ref[LIST_0] <= REF_NOT_IN_LIST && ref[LIST_1] <= REF_NOT_IN_LIST) {
+    ref[LIST_0] = ref[LIST_1] = 0;
+  } else if (ref[LIST_1] < 0) {
+    mbType &= ~MB_TYPE_L1;
+    subMbType &= ~MB_TYPE_L1;
+  } else if (ref[LIST_0] < 0) {
+    mbType &= ~MB_TYPE_L0;
+    subMbType &= ~MB_TYPE_L0;
+  }
+  pCurLayer->pMbType[iMbXy] = mbType;
+
+  int16_t pMvd[4] = { 0 };
+
+  bool bIsLongRef = pCtx->sRefPic.pRefList[LIST_1][0]->bIsLongRef;
+
+  if (IS_INTER_16x16 (mbType)) {
+    if ((* (int32_t*)iMvp[LIST_0] | * (int32_t*)iMvp[LIST_1])) {
+      if (0 == pCurLayer->iColocIntra[0] && !bIsLongRef
+          && ((pCurLayer->iColocRefIndex[LIST_0][0] == 0 && (unsigned) (pCurLayer->iColocMv[LIST_0][0][0] + 1) <= 2
+               && (unsigned) (pCurLayer->iColocMv[LIST_0][0][1] + 1) <= 2)
+              || (pCurLayer->iColocRefIndex[LIST_0][0] < 0 && pCurLayer->iColocRefIndex[LIST_1][0] == 0
+                  && (unsigned) (pCurLayer->iColocMv[LIST_1][0][0] + 1) <= 2
+                  && (unsigned) (pCurLayer->iColocMv[LIST_1][0][1] + 1) <= 2))) {
+        if (0 >= ref[0])  * (uint32_t*)iMvp[LIST_0] = 0;
+        if (0 >= ref[1])  * (uint32_t*)iMvp[LIST_1] = 0;
+      }
+    }
+    UpdateP16x16DirectCabac (pCurLayer);
+    for (int32_t listIdx = LIST_0; listIdx < LIST_A; ++listIdx) {
+      UpdateP16x16MotionInfo (pCurLayer, listIdx, ref[listIdx], iMvp[listIdx]);
+      UpdateP16x16MvdCabac (pCurLayer, pMvd, listIdx);
+    }
+  } else {
+    if (bSkipOrDirect) {
+      int8_t pSubPartCount[4], pPartW[4];
+      for (int32_t i = 0; i < 4; i++) { //Direct 8x8 Ref and mv
+        int16_t iIdx8 = i << 2;
+        pCurLayer->pSubMbType[iMbXy][i] = subMbType;
+        int8_t pRefIndex[LIST_A][30];
+        UpdateP8x8RefIdxCabac (pCurLayer, pRefIndex, iIdx8, ref[LIST_0], LIST_0);
+        UpdateP8x8RefIdxCabac (pCurLayer, pRefIndex, iIdx8, ref[LIST_1], LIST_1);
+        UpdateP8x8DirectCabac (pCurLayer, iIdx8);
+
+        pSubPartCount[i] = g_ksInterBSubMbTypeInfo[0].iPartCount;
+        pPartW[i] = g_ksInterBSubMbTypeInfo[0].iPartWidth;
+
+        if (IS_SUB_4x4 (subMbType)) {
+          pSubPartCount[i] = 4;
+          pPartW[i] = 1;
+        }
+
+        int8_t iPartCount = pSubPartCount[i];
+        int16_t iPartIdx, iBlockW = pPartW[i];
+
+        for (int32_t j = 0; j < iPartCount; j++) {
+          iPartIdx = iIdx8 + j * iBlockW;
+          uint8_t iScan4Idx = g_kuiScan4[iPartIdx];
+          uint8_t iColocIdx = g_kuiScan4[iPartIdx];
+          //uint8_t iCacheIdx = g_kuiCache30ScanIdx[iPartIdx];
+
+          int16_t pMV[4] = { 0 };
+          if (IS_SUB_8x8 (subMbType)) {
+            * (uint32_t*)pMV = * (uint32_t*)iMvp[LIST_0];
+            ST32 ((pMV + 2), LD32 (pMV));
+            ST64 (pCurLayer->pMv[LIST_0][iMbXy][iScan4Idx], LD64 (pMV));
+            ST64 (pCurLayer->pMv[LIST_0][iMbXy][iScan4Idx + 4], LD64 (pMV));
+            ST64 (pCurLayer->pMvd[LIST_0][iMbXy][iScan4Idx], 0);
+            ST64 (pCurLayer->pMvd[LIST_0][iMbXy][iScan4Idx + 4], 0);
+            * (uint32_t*)pMV = * (uint32_t*)iMvp[LIST_1];
+            ST32 ((pMV + 2), LD32 (pMV));
+            ST64 (pCurLayer->pMv[LIST_1][iMbXy][iScan4Idx], LD64 (pMV));
+            ST64 (pCurLayer->pMv[LIST_1][iMbXy][iScan4Idx + 4], LD64 (pMV));
+            ST64 (pCurLayer->pMvd[LIST_1][iMbXy][iScan4Idx], 0);
+            ST64 (pCurLayer->pMvd[LIST_1][iMbXy][iScan4Idx + 4], 0);
+          } else { //SUB_4x4
+            * (uint32_t*)pMV = * (uint32_t*)iMvp[LIST_0];
+            ST32 (pCurLayer->pMv[LIST_0][iMbXy][iScan4Idx], LD32 (pMV));
+            ST32 (pCurLayer->pMvd[LIST_0][iMbXy][iScan4Idx], 0);
+            * (uint32_t*)pMV = * (uint32_t*)iMvp[LIST_1];
+            ST32 (pCurLayer->pMv[LIST_1][iMbXy][iScan4Idx], LD32 (pMV));
+            ST32 (pCurLayer->pMvd[LIST_1][iMbXy][iScan4Idx], 0);
+          }
+          if ((* (int32_t*)iMvp[LIST_0] | * (int32_t*)iMvp[LIST_1])) {
+            uint32_t uiColZeroFlag = (0 == pCurLayer->iColocIntra[iColocIdx]) && !bIsLongRef &&
+                                     (pCurLayer->iColocRefIndex[LIST_0][iColocIdx] == 0 || (pCurLayer->iColocRefIndex[LIST_0][iColocIdx] < 0
+                                         && pCurLayer->iColocRefIndex[LIST_1][iColocIdx] == 0));
+            const int16_t (*mvColoc)[2] = 0 == pCurLayer->iColocRefIndex[LIST_0][iColocIdx] ? pCurLayer->iColocMv[LIST_0] :
+                                          pCurLayer->iColocMv[LIST_1];
+            const int16_t* mv = mvColoc[iColocIdx];
+            if (IS_SUB_8x8 (subMbType)) {
+              if (uiColZeroFlag && ((unsigned) (mv[0] + 1) <= 2 && (unsigned) (mv[1] + 1) <= 2)) {
+                if (ref[LIST_0] == 0) {
+                  ST64 (pCurLayer->pMv[LIST_0][iMbXy][iScan4Idx], 0);
+                  ST64 (pCurLayer->pMv[LIST_0][iMbXy][iScan4Idx + 4], 0);
+                  ST64 (pCurLayer->pMvd[LIST_0][iMbXy][iScan4Idx], 0);
+                  ST64 (pCurLayer->pMvd[LIST_0][iMbXy][iScan4Idx + 4], 0);
+                }
+
+                if (ref[LIST_1] == 0) {
+                  ST64 (pCurLayer->pMv[LIST_1][iMbXy][iScan4Idx], 0);
+                  ST64 (pCurLayer->pMv[LIST_1][iMbXy][iScan4Idx + 4], 0);
+                  ST64 (pCurLayer->pMvd[LIST_1][iMbXy][iScan4Idx], 0);
+                  ST64 (pCurLayer->pMvd[LIST_1][iMbXy][iScan4Idx + 4], 0);
+                }
+              }
+            } else {
+              if (uiColZeroFlag && ((unsigned) (mv[0] + 1) <= 2 && (unsigned) (mv[1] + 1) <= 2)) {
+                if (ref[LIST_0] == 0) {
+                  ST32 (pCurLayer->pMv[LIST_0][iMbXy][iScan4Idx], 0);
+                  ST32 (pCurLayer->pMvd[LIST_0][iMbXy][iScan4Idx], 0);
+                }
+                if (ref[LIST_1] == 0) {
+                  ST32 (pCurLayer->pMv[LIST_1][iMbXy][iScan4Idx], 0);
+                  ST32 (pCurLayer->pMvd[LIST_1][iMbXy][iScan4Idx], 0);
+                }
+              }
+            }
+          }
+        }
+      }
+    }
+  }
+  return subMbType;
+}
+
+void PredBDirectTemporal (PWelsDecoderContext pCtx, int16_t iMvp[LIST_A][2], int8_t ref[LIST_A]) {
+  PDqLayer pCurLayer = pCtx->pCurDqLayer;
+  int32_t iMbXy = pCurLayer->iMbXyIndex;
+  bool bSkipOrDirect = (IS_SKIP (pCurLayer->pMbType[iMbXy]) | IS_DIRECT (pCurLayer->pMbType[iMbXy])) > 0;
+  MbType mbType;
+  SubMbType subMbType;
+  GetColocatedMb (pCtx, mbType, subMbType);
+  PSlice pSlice = &pCurLayer->sLayerInfo.sSliceInLayer;
+  if (IS_INTER_16x16 (mbType)) {
+    ref[LIST_0] = 0;
+    ref[LIST_1] = 0;
+    UpdateP16x16RefIdx (pCurLayer, LIST_1, ref[LIST_1]);
+    ST64 (iMvp,  0);
+    if (pCurLayer->iColocIntra[0]) {
+      UpdateP16x16MotionOnly (pCurLayer, LIST_0, iMvp[LIST_0]);
+      UpdateP16x16MotionOnly (pCurLayer, LIST_1, iMvp[LIST_1]);
+      UpdateP16x16RefIdx (pCurLayer, LIST_0, ref[LIST_0]);
+    } else {
+      ref[LIST_0] = pCurLayer->iColocRefIndex[LIST_0][0];
+      const int16_t (*mvColoc)[2] = 0 == ref[LIST_0] ? pCurLayer->iColocMv[LIST_0] : pCurLayer->iColocMv[LIST_1];
+      const int16_t* mv = mvColoc[0];
+      UpdateP16x16RefIdx (pCurLayer, LIST_0, ref[LIST_0]);
+
+      iMvp[LIST_0][0] = (pSlice->iMvScale[LIST_0][ref[LIST_0]] * mv[0] + 128) >> 8;
+      iMvp[LIST_0][1] = (pSlice->iMvScale[LIST_0][ref[LIST_0]] * mv[1] + 128) >> 8;
+      UpdateP16x16MotionOnly (pCurLayer, LIST_0, iMvp[LIST_0]);
+      iMvp[LIST_1][0] = iMvp[LIST_0][0] - mv[0];
+      iMvp[LIST_1][1] = iMvp[LIST_0][1] - mv[1];
+      UpdateP16x16MotionOnly (pCurLayer, LIST_1, iMvp[LIST_1]);
+    }
+  } else {
+    if (bSkipOrDirect) {
+      int8_t pSubPartCount[4], pPartW[4];
+      for (int32_t i = 0; i < 4; i++) {
+        int16_t iIdx8 = i << 2;
+        pCurLayer->pSubMbType[iMbXy][i] = subMbType;
+
+        ref[LIST_1] = 0;
+        if (pCurLayer->iColocIntra[g_kuiScan4[iIdx8]]) {
+          ref[LIST_0] = 0;
+        } else {
+          if (pCurLayer->iColocRefIndex[LIST_0][iIdx8] >= 0) {
+            ref[LIST_0] = pCurLayer->iColocRefIndex[LIST_0][iIdx8];
+          } else {
+            ref[LIST_0] = pCurLayer->iColocRefIndex[LIST_1][iIdx8];
+          }
+        }
+        int8_t pRefIndex[LIST_A][30];
+        UpdateP8x8RefIdxCabac (pCurLayer, pRefIndex, iIdx8, ref[LIST_0], LIST_0);
+        UpdateP8x8RefIdxCabac (pCurLayer, pRefIndex, iIdx8, ref[LIST_1], LIST_1);
+        UpdateP8x8DirectCabac (pCurLayer, iIdx8);
+
+        pSubPartCount[i] = g_ksInterBSubMbTypeInfo[0].iPartCount;
+        pPartW[i] = g_ksInterBSubMbTypeInfo[0].iPartWidth;
+
+        if (IS_SUB_4x4 (subMbType)) {
+          pSubPartCount[i] = 4;
+          pPartW[i] = 1;
+        }
+
+        int8_t iPartCount = pSubPartCount[i];
+        int16_t iPartIdx, iBlockW = pPartW[i];
+        for (int32_t j = 0; j < iPartCount; j++) {
+          iPartIdx = iIdx8 + j * iBlockW;
+          uint8_t iScan4Idx = g_kuiScan4[iPartIdx];
+          uint8_t iColocIdx = g_kuiScan4[iPartIdx];
+
+          int16_t (*mvColoc)[2] = pCurLayer->iColocMv[LIST_0];
+          int16_t* mv = mvColoc[iColocIdx];
+
+          int16_t pMV[4] = { 0 };
+          if (IS_SUB_8x8 (subMbType)) {
+            iMvp[LIST_0][0] = (pSlice->iMvScale[LIST_0][ref[LIST_0]] * mv[0] + 128) >> 8;
+            iMvp[LIST_0][1] = (pSlice->iMvScale[LIST_0][ref[LIST_0]] * mv[1] + 128) >> 8;
+            ST32 (pMV, LD32 (iMvp[LIST_0]));
+            ST32 ((pMV + 2), LD32 (iMvp[LIST_0]));
+            ST64 (pCurLayer->pMv[LIST_0][iMbXy][iScan4Idx], LD64 (pMV));
+            ST64 (pCurLayer->pMv[LIST_0][iMbXy][iScan4Idx + 4], LD64 (pMV));
+            ST64 (pCurLayer->pMvd[LIST_0][iMbXy][iScan4Idx], 0);
+            ST64 (pCurLayer->pMvd[LIST_0][iMbXy][iScan4Idx + 4], 0);
+            iMvp[LIST_1][0] -= iMvp[LIST_0][0] - mv[0];
+            iMvp[LIST_1][1] -= iMvp[LIST_0][0] - mv[1];
+            ST32 (pMV, LD32 (iMvp[LIST_1]));
+            ST32 ((pMV + 2), LD32 (iMvp[LIST_1]));
+            ST64 (pCurLayer->pMv[LIST_1][iMbXy][iScan4Idx], LD64 (pMV));
+            ST64 (pCurLayer->pMv[LIST_1][iMbXy][iScan4Idx + 4], LD64 (pMV));
+            ST64 (pCurLayer->pMvd[LIST_1][iMbXy][iScan4Idx], 0);
+            ST64 (pCurLayer->pMvd[LIST_1][iMbXy][iScan4Idx + 4], 0);
+          } else { //SUB_4x4
+            iMvp[LIST_0][0] = (pSlice->iMvScale[LIST_0][ref[LIST_0]] * mv[0] + 128) >> 8;
+            iMvp[LIST_0][1] = (pSlice->iMvScale[LIST_0][ref[LIST_0]] * mv[1] + 128) >> 8;
+            ST32 (pCurLayer->pMv[LIST_0][iMbXy][iScan4Idx], LD32 (iMvp[LIST_0]));
+            ST32 (pCurLayer->pMvd[LIST_0][iMbXy][iScan4Idx], 0);
+            iMvp[LIST_1][0] -= iMvp[LIST_0][0] - mv[0];
+            iMvp[LIST_1][1] -= iMvp[LIST_0][0] - mv[1];
+            ST32 (pCurLayer->pMv[LIST_1][iMbXy][iScan4Idx], LD32 (iMvp[LIST_1]));
+            ST32 (pCurLayer->pMvd[LIST_1][iMbXy][iScan4Idx], 0);
+          }
+        }
+      }
+    }
+  }
+}
+
 //basic iMVs prediction unit for iMVs partition width (4, 2, 1)
 void PredMv (int16_t iMotionVector[LIST_A][30][MV_A], int8_t iRefIndex[LIST_A][30],
-             int32_t iPartIdx, int32_t iPartWidth, int8_t iRef, int16_t iMVP[2]) {
+             int32_t listIdx, int32_t iPartIdx, int32_t iPartWidth, int8_t iRef, int16_t iMVP[2]) {
   const uint8_t kuiLeftIdx      = g_kuiCache30ScanIdx[iPartIdx] - 1;
   const uint8_t kuiTopIdx       = g_kuiCache30ScanIdx[iPartIdx] - 6;
   const uint8_t kuiRightTopIdx  = kuiTopIdx + iPartWidth;
   const uint8_t kuiLeftTopIdx   = kuiTopIdx - 1;
 
-  const int8_t kiLeftRef      = iRefIndex[0][kuiLeftIdx];
-  const int8_t kiTopRef       = iRefIndex[0][ kuiTopIdx];
-  const int8_t kiRightTopRef  = iRefIndex[0][kuiRightTopIdx];
-  const int8_t kiLeftTopRef   = iRefIndex[0][ kuiLeftTopIdx];
+  const int8_t kiLeftRef      = iRefIndex[listIdx][kuiLeftIdx];
+  const int8_t kiTopRef       = iRefIndex[listIdx][ kuiTopIdx];
+  const int8_t kiRightTopRef  = iRefIndex[listIdx][kuiRightTopIdx];
+  const int8_t kiLeftTopRef   = iRefIndex[listIdx][ kuiLeftTopIdx];
   int8_t iDiagonalRef  = kiRightTopRef;
 
   int8_t iMatchRef = 0;
@@ -212,13 +769,13 @@
 
   int16_t iAMV[2], iBMV[2], iCMV[2];
 
-  ST32 (iAMV, LD32 (iMotionVector[0][     kuiLeftIdx]));
-  ST32 (iBMV, LD32 (iMotionVector[0][      kuiTopIdx]));
-  ST32 (iCMV, LD32 (iMotionVector[0][kuiRightTopIdx]));
+  ST32 (iAMV, LD32 (iMotionVector[listIdx][     kuiLeftIdx]));
+  ST32 (iBMV, LD32 (iMotionVector[listIdx][      kuiTopIdx]));
+  ST32 (iCMV, LD32 (iMotionVector[listIdx][kuiRightTopIdx]));
 
   if (REF_NOT_AVAIL == iDiagonalRef) {
     iDiagonalRef = kiLeftTopRef;
-    ST32 (iCMV, LD32 (iMotionVector[0][kuiLeftTopIdx]));
+    ST32 (iCMV, LD32 (iMotionVector[listIdx][kuiLeftTopIdx]));
   }
 
   iMatchRef = (iRef == kiLeftRef) + (iRef == kiTopRef) + (iRef == iDiagonalRef);
@@ -242,51 +799,51 @@
   }
 }
 void PredInter8x16Mv (int16_t iMotionVector[LIST_A][30][MV_A], int8_t iRefIndex[LIST_A][30],
-                      int32_t iPartIdx, int8_t iRef, int16_t iMVP[2]) {
+                      int32_t listIdx, int32_t iPartIdx, int8_t iRef, int16_t iMVP[2]) {
   if (0 == iPartIdx) {
-    const int8_t kiLeftRef = iRefIndex[0][6];
+    const int8_t kiLeftRef = iRefIndex[listIdx][6];
     if (iRef == kiLeftRef) {
-      ST32 (iMVP, LD32 (&iMotionVector[0][6][0]));
+      ST32 (iMVP, LD32 (&iMotionVector[listIdx][6][0]));
       return;
     }
   } else { // 1 == iPartIdx
-    int8_t iDiagonalRef = iRefIndex[0][5]; //top-right
+    int8_t iDiagonalRef = iRefIndex[listIdx][5]; //top-right
     int8_t index = 5;
     if (REF_NOT_AVAIL == iDiagonalRef) {
-      iDiagonalRef = iRefIndex[0][2]; //top-left for 8*8 block(index 1)
+      iDiagonalRef = iRefIndex[listIdx][2]; //top-left for 8*8 block(index 1)
       index = 2;
     }
     if (iRef == iDiagonalRef) {
-      ST32 (iMVP, LD32 (&iMotionVector[0][index][0]));
+      ST32 (iMVP, LD32 (&iMotionVector[listIdx][index][0]));
       return;
     }
   }
 
-  PredMv (iMotionVector, iRefIndex, iPartIdx, 2, iRef, iMVP);
+  PredMv (iMotionVector, iRefIndex, listIdx, iPartIdx, 2, iRef, iMVP);
 }
 void PredInter16x8Mv (int16_t iMotionVector[LIST_A][30][MV_A], int8_t iRefIndex[LIST_A][30],
-                      int32_t iPartIdx, int8_t iRef, int16_t iMVP[2]) {
+                      int32_t listIdx, int32_t iPartIdx, int8_t iRef, int16_t iMVP[2]) {
   if (0 == iPartIdx) {
-    const int8_t kiTopRef = iRefIndex[0][1];
+    const int8_t kiTopRef = iRefIndex[listIdx][1];
     if (iRef == kiTopRef) {
-      ST32 (iMVP, LD32 (&iMotionVector[0][1][0]));
+      ST32 (iMVP, LD32 (&iMotionVector[listIdx][1][0]));
       return;
     }
   } else { // 8 == iPartIdx
-    const int8_t kiLeftRef = iRefIndex[0][18];
+    const int8_t kiLeftRef = iRefIndex[listIdx][18];
     if (iRef == kiLeftRef) {
-      ST32 (iMVP, LD32 (&iMotionVector[0][18][0]));
+      ST32 (iMVP, LD32 (&iMotionVector[listIdx][18][0]));
       return;
     }
   }
 
-  PredMv (iMotionVector, iRefIndex, iPartIdx, 4, iRef, iMVP);
+  PredMv (iMotionVector, iRefIndex, listIdx, iPartIdx, 4, iRef, iMVP);
 }
 
 //update iMVs and iRefIndex cache for current MB, only for P_16*16 (SKIP inclusive)
 /* can be further optimized */
-void UpdateP16x16MotionInfo (PDqLayer pCurDqLayer, int8_t iRef, int16_t iMVs[2]) {
-  const int16_t kiRef2 = (iRef << 8) | iRef;
+void UpdateP16x16MotionInfo (PDqLayer pCurDqLayer, int32_t listIdx, int8_t iRef, int16_t iMVs[2]) {
+  const int16_t kiRef2 = ((uint8_t)iRef << 8) | (uint8_t)iRef;
   const int32_t kiMV32 = LD32 (iMVs);
   int32_t i;
   int32_t iMbXy = pCurDqLayer->iMbXyIndex;
@@ -296,22 +853,58 @@
     const uint8_t kuiScan4Idx = g_kuiScan4[i];
     const uint8_t kuiScan4IdxPlus4 = 4 + kuiScan4Idx;
 
-    ST16 (&pCurDqLayer->pRefIndex[0][iMbXy][kuiScan4Idx ], kiRef2);
-    ST16 (&pCurDqLayer->pRefIndex[0][iMbXy][kuiScan4IdxPlus4], kiRef2);
+    ST16 (&pCurDqLayer->pRefIndex[listIdx][iMbXy][kuiScan4Idx ], kiRef2);
+    ST16 (&pCurDqLayer->pRefIndex[listIdx][iMbXy][kuiScan4IdxPlus4], kiRef2);
 
-    ST32 (pCurDqLayer->pMv[0][iMbXy][  kuiScan4Idx ], kiMV32);
-    ST32 (pCurDqLayer->pMv[0][iMbXy][1 + kuiScan4Idx ], kiMV32);
-    ST32 (pCurDqLayer->pMv[0][iMbXy][  kuiScan4IdxPlus4], kiMV32);
-    ST32 (pCurDqLayer->pMv[0][iMbXy][1 + kuiScan4IdxPlus4], kiMV32);
+    ST32 (pCurDqLayer->pMv[listIdx][iMbXy][  kuiScan4Idx ], kiMV32);
+    ST32 (pCurDqLayer->pMv[listIdx][iMbXy][1 + kuiScan4Idx ], kiMV32);
+    ST32 (pCurDqLayer->pMv[listIdx][iMbXy][  kuiScan4IdxPlus4], kiMV32);
+    ST32 (pCurDqLayer->pMv[listIdx][iMbXy][1 + kuiScan4IdxPlus4], kiMV32);
   }
 }
 
+//update iRefIndex cache for current MB, only for P_16*16 (SKIP inclusive)
+/* can be further optimized */
+void UpdateP16x16RefIdx (PDqLayer pCurDqLayer, int32_t listIdx, int8_t iRef) {
+  const int16_t kiRef2 = ((uint8_t)iRef << 8) | (uint8_t)iRef;
+  int32_t i;
+  int32_t iMbXy = pCurDqLayer->iMbXyIndex;
+
+  for (i = 0; i < 16; i += 4) {
+    //mb
+    const uint8_t kuiScan4Idx = g_kuiScan4[i];
+    const uint8_t kuiScan4IdxPlus4 = 4 + kuiScan4Idx;
+
+    ST16 (&pCurDqLayer->pRefIndex[listIdx][iMbXy][kuiScan4Idx], kiRef2);
+    ST16 (&pCurDqLayer->pRefIndex[listIdx][iMbXy][kuiScan4IdxPlus4], kiRef2);
+  }
+}
+
+//update iMVs only cache for current MB, only for P_16*16 (SKIP inclusive)
+/* can be further optimized */
+void UpdateP16x16MotionOnly (PDqLayer pCurDqLayer, int32_t listIdx, int16_t iMVs[2]) {
+  const int32_t kiMV32 = LD32 (iMVs);
+  int32_t i;
+  int32_t iMbXy = pCurDqLayer->iMbXyIndex;
+
+  for (i = 0; i < 16; i += 4) {
+    //mb
+    const uint8_t kuiScan4Idx = g_kuiScan4[i];
+    const uint8_t kuiScan4IdxPlus4 = 4 + kuiScan4Idx;
+
+    ST32 (pCurDqLayer->pMv[listIdx][iMbXy][kuiScan4Idx], kiMV32);
+    ST32 (pCurDqLayer->pMv[listIdx][iMbXy][1 + kuiScan4Idx], kiMV32);
+    ST32 (pCurDqLayer->pMv[listIdx][iMbXy][kuiScan4IdxPlus4], kiMV32);
+    ST32 (pCurDqLayer->pMv[listIdx][iMbXy][1 + kuiScan4IdxPlus4], kiMV32);
+  }
+}
+
 //update iRefIndex and iMVs of Mb, only for P16x8
 /*need further optimization, mb_cache not work */
 void UpdateP16x8MotionInfo (PDqLayer pCurDqLayer, int16_t iMotionVector[LIST_A][30][MV_A],
                             int8_t iRefIndex[LIST_A][30],
-                            int32_t iPartIdx, int8_t iRef, int16_t iMVs[2]) {
-  const int16_t kiRef2 = (iRef << 8) | iRef;
+                            int32_t listIdx, int32_t iPartIdx, int8_t iRef, int16_t iMVs[2]) {
+  const int16_t kiRef2 = ((uint8_t)iRef << 8) | (uint8_t)iRef;
   const int32_t kiMV32 = LD32 (iMVs);
   int32_t i;
   int32_t iMbXy = pCurDqLayer->iMbXyIndex;
@@ -322,26 +915,26 @@
     const uint8_t kuiCacheIdxPlus6 = 6 + kuiCacheIdx;
 
     //mb
-    ST16 (&pCurDqLayer->pRefIndex[0][iMbXy][kuiScan4Idx ], kiRef2);
-    ST16 (&pCurDqLayer->pRefIndex[0][iMbXy][kuiScan4IdxPlus4], kiRef2);
-    ST32 (pCurDqLayer->pMv[0][iMbXy][  kuiScan4Idx ], kiMV32);
-    ST32 (pCurDqLayer->pMv[0][iMbXy][1 + kuiScan4Idx ], kiMV32);
-    ST32 (pCurDqLayer->pMv[0][iMbXy][  kuiScan4IdxPlus4], kiMV32);
-    ST32 (pCurDqLayer->pMv[0][iMbXy][1 + kuiScan4IdxPlus4], kiMV32);
+    ST16 (&pCurDqLayer->pRefIndex[listIdx][iMbXy][kuiScan4Idx ], kiRef2);
+    ST16 (&pCurDqLayer->pRefIndex[listIdx][iMbXy][kuiScan4IdxPlus4], kiRef2);
+    ST32 (pCurDqLayer->pMv[listIdx][iMbXy][  kuiScan4Idx ], kiMV32);
+    ST32 (pCurDqLayer->pMv[listIdx][iMbXy][1 + kuiScan4Idx ], kiMV32);
+    ST32 (pCurDqLayer->pMv[listIdx][iMbXy][  kuiScan4IdxPlus4], kiMV32);
+    ST32 (pCurDqLayer->pMv[listIdx][iMbXy][1 + kuiScan4IdxPlus4], kiMV32);
     //cache
-    ST16 (&iRefIndex[0][kuiCacheIdx ], kiRef2);
-    ST16 (&iRefIndex[0][kuiCacheIdxPlus6], kiRef2);
-    ST32 (iMotionVector[0][  kuiCacheIdx ], kiMV32);
-    ST32 (iMotionVector[0][1 + kuiCacheIdx ], kiMV32);
-    ST32 (iMotionVector[0][  kuiCacheIdxPlus6], kiMV32);
-    ST32 (iMotionVector[0][1 + kuiCacheIdxPlus6], kiMV32);
+    ST16 (&iRefIndex[listIdx][kuiCacheIdx ], kiRef2);
+    ST16 (&iRefIndex[listIdx][kuiCacheIdxPlus6], kiRef2);
+    ST32 (iMotionVector[listIdx][  kuiCacheIdx ], kiMV32);
+    ST32 (iMotionVector[listIdx][1 + kuiCacheIdx ], kiMV32);
+    ST32 (iMotionVector[listIdx][  kuiCacheIdxPlus6], kiMV32);
+    ST32 (iMotionVector[listIdx][1 + kuiCacheIdxPlus6], kiMV32);
   }
 }
 //update iRefIndex and iMVs of both Mb and Mb_cache, only for P8x16
 void UpdateP8x16MotionInfo (PDqLayer pCurDqLayer, int16_t iMotionVector[LIST_A][30][MV_A],
                             int8_t iRefIndex[LIST_A][30],
-                            int32_t iPartIdx, int8_t iRef, int16_t iMVs[2]) {
-  const int16_t kiRef2 = (iRef << 8) | iRef;
+                            int32_t listIdx, int32_t iPartIdx, int8_t iRef, int16_t iMVs[2]) {
+  const int16_t kiRef2 = ((uint8_t)iRef << 8) | (uint8_t)iRef;
   const int32_t kiMV32 = LD32 (iMVs);
   int32_t i;
   int32_t iMbXy = pCurDqLayer->iMbXyIndex;
@@ -353,19 +946,19 @@
     const uint8_t kuiCacheIdxPlus6 = 6 + kuiCacheIdx;
 
     //mb
-    ST16 (&pCurDqLayer->pRefIndex[0][iMbXy][kuiScan4Idx ], kiRef2);
-    ST16 (&pCurDqLayer->pRefIndex[0][iMbXy][kuiScan4IdxPlus4], kiRef2);
-    ST32 (pCurDqLayer->pMv[0][iMbXy][  kuiScan4Idx ], kiMV32);
-    ST32 (pCurDqLayer->pMv[0][iMbXy][1 + kuiScan4Idx ], kiMV32);
-    ST32 (pCurDqLayer->pMv[0][iMbXy][  kuiScan4IdxPlus4], kiMV32);
-    ST32 (pCurDqLayer->pMv[0][iMbXy][1 + kuiScan4IdxPlus4], kiMV32);
+    ST16 (&pCurDqLayer->pRefIndex[listIdx][iMbXy][kuiScan4Idx ], kiRef2);
+    ST16 (&pCurDqLayer->pRefIndex[listIdx][iMbXy][kuiScan4IdxPlus4], kiRef2);
+    ST32 (pCurDqLayer->pMv[listIdx][iMbXy][  kuiScan4Idx ], kiMV32);
+    ST32 (pCurDqLayer->pMv[listIdx][iMbXy][1 + kuiScan4Idx ], kiMV32);
+    ST32 (pCurDqLayer->pMv[listIdx][iMbXy][  kuiScan4IdxPlus4], kiMV32);
+    ST32 (pCurDqLayer->pMv[listIdx][iMbXy][1 + kuiScan4IdxPlus4], kiMV32);
     //cache
-    ST16 (&iRefIndex[0][kuiCacheIdx ], kiRef2);
-    ST16 (&iRefIndex[0][kuiCacheIdxPlus6], kiRef2);
-    ST32 (iMotionVector[0][  kuiCacheIdx ], kiMV32);
-    ST32 (iMotionVector[0][1 + kuiCacheIdx ], kiMV32);
-    ST32 (iMotionVector[0][  kuiCacheIdxPlus6], kiMV32);
-    ST32 (iMotionVector[0][1 + kuiCacheIdxPlus6], kiMV32);
+    ST16 (&iRefIndex[listIdx][kuiCacheIdx ], kiRef2);
+    ST16 (&iRefIndex[listIdx][kuiCacheIdxPlus6], kiRef2);
+    ST32 (iMotionVector[listIdx][  kuiCacheIdx ], kiMV32);
+    ST32 (iMotionVector[listIdx][1 + kuiCacheIdx ], kiMV32);
+    ST32 (iMotionVector[listIdx][  kuiCacheIdxPlus6], kiMV32);
+    ST32 (iMotionVector[listIdx][1 + kuiCacheIdxPlus6], kiMV32);
   }
 }
 
--- a/codec/decoder/core/src/parse_mb_syn_cabac.cpp
+++ b/codec/decoder/core/src/parse_mb_syn_cabac.cpp
@@ -1,1023 +1,1658 @@
-/*!
- * \copy
- *     Copyright (c)  2013, Cisco Systems
- *     All rights reserved.
- *
- *     Redistribution and use in source and binary forms, with or without
- *     modification, are permitted provided that the following conditions
- *     are met:
- *
- *        * Redistributions of source code must retain the above copyright
- *          notice, this list of conditions and the following disclaimer.
- *
- *        * Redistributions in binary form must reproduce the above copyright
- *          notice, this list of conditions and the following disclaimer in
- *          the documentation and/or other materials provided with the
- *          distribution.
- *
- *     THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- *     "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- *     LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
- *     FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
- *     COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
- *     INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
- *     BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
- *     LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
- *     CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- *     LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
- *     ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- *     POSSIBILITY OF SUCH DAMAGE.
- *
- *      parse_mb_syn_cabac.cpp: cabac parse for syntax elements
- */
-#include "parse_mb_syn_cabac.h"
-#include "decode_slice.h"
-#include "mv_pred.h"
-#include "error_code.h"
-namespace WelsDec {
-#define IDX_UNUSED -1
-
-static const int16_t g_kMaxPos       [] = {IDX_UNUSED, 15, 14, 15, 3, 14, 63, 3, 3, 14, 14};
-static const int16_t g_kMaxC2       [] = {IDX_UNUSED, 4, 4, 4, 3, 4, 4, 3, 3, 4, 4};
-static const int16_t g_kBlockCat2CtxOffsetCBF[] = {IDX_UNUSED, 0, 4, 8, 12, 16, 0, 12, 12, 16, 16};
-static const int16_t g_kBlockCat2CtxOffsetMap [] = {IDX_UNUSED, 0, 15, 29, 44, 47, 0, 44, 44, 47, 47};
-static const int16_t g_kBlockCat2CtxOffsetLast[] = {IDX_UNUSED, 0, 15, 29, 44, 47, 0, 44, 44, 47, 47};
-static const int16_t g_kBlockCat2CtxOffsetOne [] = {IDX_UNUSED, 0 , 10, 20, 30, 39, 0, 30, 30, 39, 39};
-static const int16_t g_kBlockCat2CtxOffsetAbs [] = {IDX_UNUSED, 0 , 10, 20, 30, 39, 0, 30, 30, 39, 39};
-
-const uint8_t g_kTopBlkInsideMb[24] = { //for index with z-order 0~23
-  //  0   1 | 4  5      luma 8*8 block           pNonZeroCount[16+8]
-  0,  0,  1,  1,   //  2   3 | 6  7        0  |  1                  0   1   2   3
-  0,  0,  1,  1,   //---------------      ---------                 4   5   6   7
-  1,  1,  1,  1,   //  8   9 | 12 13       2  |  3                  8   9  10  11
-  1,  1,  1,  1,  // 10  11 | 14 15-----------------------------> 12  13  14  15
-  0,  0,  1,  1,   //----------------    chroma 8*8 block          16  17  18  19
-  0,  0,  1,  1   // 16  17 | 20 21        0    1                 20  21  22  23
-  // 18  19 | 22 23
-};
-
-const uint8_t g_kLeftBlkInsideMb[24] = { //for index with z-order 0~23
-  //  0   1 | 4  5      luma 8*8 block           pNonZeroCount[16+8]
-  0,  1,  0,  1,   //  2   3 | 6  7        0  |  1                  0   1   2   3
-  1,  1,  1,  1,   //---------------      ---------                 4   5   6   7
-  0,  1,  0,  1,   //  8   9 | 12 13       2  |  3                  8   9  10  11
-  1,  1,  1,  1,  // 10  11 | 14 15-----------------------------> 12  13  14  15
-  0,  1,  0,  1,   //----------------    chroma 8*8 block          16  17  18  19
-  0,  1,  0,  1   // 16  17 | 20 21        0    1                 20  21  22  23
-  // 18  19 | 22 23
-};
-
-void UpdateP16x8RefIdxCabac (PDqLayer pCurDqLayer, int8_t pRefIndex[LIST_A][30], int32_t iPartIdx, const int8_t iRef,
-                             const int8_t iListIdx) {
-  int32_t iRef32Bit = (int32_t) iRef;
-  const int32_t iRef4Bytes = (iRef32Bit << 24) | (iRef32Bit << 16) | (iRef32Bit << 8) | iRef32Bit;
-  int32_t iMbXy = pCurDqLayer->iMbXyIndex;
-  const uint8_t iScan4Idx = g_kuiScan4[iPartIdx];
-  const uint8_t iScan4Idx4 = 4 + iScan4Idx;
-  const uint8_t iCacheIdx = g_kuiCache30ScanIdx[iPartIdx];
-  const uint8_t iCacheIdx6 = 6 + iCacheIdx;
-  //mb
-  ST32 (&pCurDqLayer->pRefIndex[iListIdx][iMbXy][iScan4Idx ], iRef4Bytes);
-  ST32 (&pCurDqLayer->pRefIndex[iListIdx][iMbXy][iScan4Idx4], iRef4Bytes);
-  //cache
-  ST32 (&pRefIndex[iListIdx][iCacheIdx ], iRef4Bytes);
-  ST32 (&pRefIndex[iListIdx][iCacheIdx6], iRef4Bytes);
-}
-
-void UpdateP8x16RefIdxCabac (PDqLayer pCurDqLayer, int8_t pRefIndex[LIST_A][30], int32_t iPartIdx, const int8_t iRef,
-                             const int8_t iListIdx) {
-  int16_t iRef16Bit = (int16_t) iRef;
-  const int16_t iRef2Bytes = (iRef16Bit << 8) | iRef16Bit;
-  int32_t i;
-  int32_t iMbXy = pCurDqLayer->iMbXyIndex;
-  for (i = 0; i < 2; i++, iPartIdx += 8) {
-    const uint8_t iScan4Idx = g_kuiScan4[iPartIdx];
-    const uint8_t iCacheIdx = g_kuiCache30ScanIdx[iPartIdx];
-    const uint8_t iScan4Idx4 = 4 + iScan4Idx;
-    const uint8_t iCacheIdx6 = 6 + iCacheIdx;
-    //mb
-    ST16 (&pCurDqLayer->pRefIndex[iListIdx][iMbXy][iScan4Idx ], iRef2Bytes);
-    ST16 (&pCurDqLayer->pRefIndex[iListIdx][iMbXy][iScan4Idx4], iRef2Bytes);
-    //cache
-    ST16 (&pRefIndex[iListIdx][iCacheIdx ], iRef2Bytes);
-    ST16 (&pRefIndex[iListIdx][iCacheIdx6], iRef2Bytes);
-  }
-}
-
-void UpdateP8x8RefIdxCabac (PDqLayer pCurDqLayer, int8_t pRefIndex[LIST_A][30], int32_t iPartIdx, const int8_t iRef,
-                            const int8_t iListIdx) {
-  int32_t iMbXy = pCurDqLayer->iMbXyIndex;
-  const uint8_t iScan4Idx = g_kuiScan4[iPartIdx];
-  pCurDqLayer->pRefIndex[iListIdx][iMbXy][iScan4Idx] = pCurDqLayer->pRefIndex[iListIdx][iMbXy][iScan4Idx + 1] =
-        pCurDqLayer->pRefIndex[iListIdx][iMbXy][iScan4Idx + 4] = pCurDqLayer->pRefIndex[iListIdx][iMbXy][iScan4Idx + 5] = iRef;
-}
-
-void UpdateP16x16MvdCabac (SDqLayer* pCurDqLayer, int16_t pMvd[2], const int8_t iListIdx) {
-  int32_t pMvd32[2];
-  ST32 (&pMvd32[0], LD32 (pMvd));
-  ST32 (&pMvd32[1], LD32 (pMvd));
-  int32_t i;
-  int32_t iMbXy = pCurDqLayer->iMbXyIndex;
-  for (i = 0; i < 16; i += 2) {
-    ST64 (pCurDqLayer->pMvd[iListIdx][iMbXy][i], LD64 (pMvd32));
-  }
-}
-
-void UpdateP16x8MvdCabac (SDqLayer* pCurDqLayer, int16_t pMvdCache[LIST_A][30][MV_A], int32_t iPartIdx, int16_t pMvd[2],
-                          const int8_t iListIdx) {
-  int32_t pMvd32[2];
-  ST32 (&pMvd32[0], LD32 (pMvd));
-  ST32 (&pMvd32[1], LD32 (pMvd));
-  int32_t i;
-  int32_t iMbXy = pCurDqLayer->iMbXyIndex;
-  for (i = 0; i < 2; i++, iPartIdx += 4) {
-    const uint8_t iScan4Idx = g_kuiScan4[iPartIdx];
-    const uint8_t iScan4Idx4 = 4 + iScan4Idx;
-    const uint8_t iCacheIdx = g_kuiCache30ScanIdx[iPartIdx];
-    const uint8_t iCacheIdx6 = 6 + iCacheIdx;
-    //mb
-    ST64 (pCurDqLayer->pMvd[iListIdx][iMbXy][  iScan4Idx ], LD64 (pMvd32));
-    ST64 (pCurDqLayer->pMvd[iListIdx][iMbXy][  iScan4Idx4], LD64 (pMvd32));
-    //cache
-    ST64 (pMvdCache[iListIdx][  iCacheIdx ], LD64 (pMvd32));
-    ST64 (pMvdCache[iListIdx][  iCacheIdx6], LD64 (pMvd32));
-  }
-}
-
-void UpdateP8x16MvdCabac (SDqLayer* pCurDqLayer, int16_t pMvdCache[LIST_A][30][MV_A], int32_t iPartIdx, int16_t pMvd[2],
-                          const int8_t iListIdx) {
-  int32_t pMvd32[2];
-  ST32 (&pMvd32[0], LD32 (pMvd));
-  ST32 (&pMvd32[1], LD32 (pMvd));
-  int32_t i;
-  int32_t iMbXy = pCurDqLayer->iMbXyIndex;
-
-  for (i = 0; i < 2; i++, iPartIdx += 8) {
-    const uint8_t iScan4Idx = g_kuiScan4[iPartIdx];
-    const uint8_t iCacheIdx = g_kuiCache30ScanIdx[iPartIdx];
-    const uint8_t iScan4Idx4 = 4 + iScan4Idx;
-    const uint8_t iCacheIdx6 = 6 + iCacheIdx;
-    //mb
-    ST64 (pCurDqLayer->pMvd[iListIdx][iMbXy][  iScan4Idx ], LD64 (pMvd32));
-    ST64 (pCurDqLayer->pMvd[iListIdx][iMbXy][  iScan4Idx4], LD64 (pMvd32));
-    //cache
-    ST64 (pMvdCache[iListIdx][  iCacheIdx ], LD64 (pMvd32));
-    ST64 (pMvdCache[iListIdx][  iCacheIdx6], LD64 (pMvd32));
-  }
-}
-
-int32_t ParseEndOfSliceCabac (PWelsDecoderContext pCtx, uint32_t& uiBinVal) {
-  uiBinVal = 0;
-  WELS_READ_VERIFY (DecodeTerminateCabac (pCtx->pCabacDecEngine, uiBinVal));
-  return ERR_NONE;
-}
-
-int32_t ParseSkipFlagCabac (PWelsDecoderContext pCtx, PWelsNeighAvail pNeighAvail, uint32_t& uiSkip) {
-  uiSkip = 0;
-  int32_t iCtxInc = (pNeighAvail->iLeftAvail && pNeighAvail->iLeftType != MB_TYPE_SKIP) + (pNeighAvail->iTopAvail
-                    && pNeighAvail->iTopType  != MB_TYPE_SKIP);
-  PWelsCabacCtx pBinCtx = (pCtx->pCabacCtx + NEW_CTX_OFFSET_SKIP + iCtxInc);
-  WELS_READ_VERIFY (DecodeBinCabac (pCtx->pCabacDecEngine, pBinCtx, uiSkip));
-  return ERR_NONE;
-}
-
-
-int32_t ParseMBTypeISliceCabac (PWelsDecoderContext pCtx, PWelsNeighAvail pNeighAvail, uint32_t& uiBinVal) {
-  uint32_t uiCode;
-  int32_t iIdxA = 0, iIdxB = 0;
-  int32_t iCtxInc;
-  uiBinVal = 0;
-  PWelsCabacDecEngine pCabacDecEngine = pCtx->pCabacDecEngine;
-  PWelsCabacCtx pBinCtx = pCtx->pCabacCtx + NEW_CTX_OFFSET_MB_TYPE_I; //I mode in I slice
-  iIdxA = (pNeighAvail->iLeftAvail) && (pNeighAvail->iLeftType != MB_TYPE_INTRA4x4
-                                        && pNeighAvail->iLeftType != MB_TYPE_INTRA8x8);
-  iIdxB = (pNeighAvail->iTopAvail) && (pNeighAvail->iTopType != MB_TYPE_INTRA4x4
-                                       && pNeighAvail->iTopType != MB_TYPE_INTRA8x8);
-  iCtxInc = iIdxA + iIdxB;
-  WELS_READ_VERIFY (DecodeBinCabac (pCabacDecEngine, pBinCtx + iCtxInc, uiCode));
-  uiBinVal = uiCode;
-  if (uiBinVal != 0) {  //I16x16
-    WELS_READ_VERIFY (DecodeTerminateCabac (pCabacDecEngine, uiCode));
-    if (uiCode == 1)
-      uiBinVal = 25; //I_PCM
-    else {
-      WELS_READ_VERIFY (DecodeBinCabac (pCabacDecEngine, pBinCtx + 3, uiCode));
-      uiBinVal = 1 + uiCode * 12;
-      //decoding of uiCbp:0,1,2
-      WELS_READ_VERIFY (DecodeBinCabac (pCabacDecEngine, pBinCtx + 4, uiCode));
-      if (uiCode != 0) {
-        WELS_READ_VERIFY (DecodeBinCabac (pCabacDecEngine, pBinCtx + 5, uiCode));
-        uiBinVal += 4;
-        if (uiCode != 0)
-          uiBinVal += 4;
-      }
-      //decoding of I pred-mode: 0,1,2,3
-      WELS_READ_VERIFY (DecodeBinCabac (pCabacDecEngine, pBinCtx + 6, uiCode));
-      uiBinVal += (uiCode << 1);
-      WELS_READ_VERIFY (DecodeBinCabac (pCabacDecEngine, pBinCtx + 7, uiCode));
-      uiBinVal += uiCode;
-    }
-  }
-  //I4x4
-  return ERR_NONE;
-}
-
-int32_t ParseMBTypePSliceCabac (PWelsDecoderContext pCtx, PWelsNeighAvail pNeighAvail, uint32_t& uiMbType) {
-  uint32_t uiCode;
-  uiMbType = 0;
-  PWelsCabacDecEngine pCabacDecEngine = pCtx->pCabacDecEngine;
-
-  PWelsCabacCtx pBinCtx = pCtx->pCabacCtx + NEW_CTX_OFFSET_SKIP;
-  WELS_READ_VERIFY (DecodeBinCabac (pCabacDecEngine, pBinCtx + 3, uiCode));
-  if (uiCode) {
-    // Intra MB
-    WELS_READ_VERIFY (DecodeBinCabac (pCabacDecEngine, pBinCtx + 6, uiCode));
-    if (uiCode) { // Intra 16x16
-      WELS_READ_VERIFY (DecodeTerminateCabac (pCabacDecEngine, uiCode));
-      if (uiCode) {
-        uiMbType = 30;
-        return ERR_NONE;//MB_TYPE_INTRA_PCM;
-      }
-
-      WELS_READ_VERIFY (DecodeBinCabac (pCabacDecEngine, pBinCtx + 7, uiCode));
-      uiMbType = 6 + uiCode * 12;
-
-      //uiCbp: 0,1,2
-      WELS_READ_VERIFY (DecodeBinCabac (pCabacDecEngine, pBinCtx + 8, uiCode));
-      if (uiCode) {
-        uiMbType += 4;
-        WELS_READ_VERIFY (DecodeBinCabac (pCabacDecEngine, pBinCtx + 8, uiCode));
-        if (uiCode)
-          uiMbType += 4;
-      }
-
-      //IPredMode: 0,1,2,3
-      WELS_READ_VERIFY (DecodeBinCabac (pCabacDecEngine, pBinCtx + 9, uiCode));
-      uiMbType += (uiCode << 1);
-      WELS_READ_VERIFY (DecodeBinCabac (pCabacDecEngine, pBinCtx + 9, uiCode));
-      uiMbType += uiCode;
-    } else
-      // Intra 4x4
-      uiMbType = 5;
-  } else { // P MB
-    WELS_READ_VERIFY (DecodeBinCabac (pCabacDecEngine, pBinCtx + 4, uiCode));
-    if (uiCode) { //second bit
-      WELS_READ_VERIFY (DecodeBinCabac (pCabacDecEngine, pBinCtx + 6, uiCode));
-      if (uiCode)
-        uiMbType = 1;
-      else
-        uiMbType = 2;
-    } else {
-      WELS_READ_VERIFY (DecodeBinCabac (pCabacDecEngine, pBinCtx + 5, uiCode));
-      if (uiCode)
-        uiMbType = 3;
-      else
-        uiMbType = 0;
-    }
-  }
-  return ERR_NONE;
-}
-
-int32_t ParseTransformSize8x8FlagCabac (PWelsDecoderContext pCtx, PWelsNeighAvail pNeighAvail,
-                                        bool& bTransformSize8x8Flag) {
-  uint32_t uiCode;
-  int32_t iIdxA, iIdxB;
-  int32_t iCtxInc;
-  PWelsCabacDecEngine pCabacDecEngine = pCtx->pCabacDecEngine;
-  PWelsCabacCtx pBinCtx = pCtx->pCabacCtx + NEW_CTX_OFFSET_TS_8x8_FLAG;
-  iIdxA = (pNeighAvail->iLeftAvail) && (pCtx->pCurDqLayer->pTransformSize8x8Flag[pCtx->pCurDqLayer->iMbXyIndex - 1]);
-  iIdxB = (pNeighAvail->iTopAvail)
-          && (pCtx->pCurDqLayer->pTransformSize8x8Flag[pCtx->pCurDqLayer->iMbXyIndex - pCtx->pCurDqLayer->iMbWidth]);
-  iCtxInc = iIdxA + iIdxB;
-  WELS_READ_VERIFY (DecodeBinCabac (pCabacDecEngine, pBinCtx + iCtxInc, uiCode));
-  bTransformSize8x8Flag = !!uiCode;
-
-  return ERR_NONE;
-}
-
-int32_t ParseSubMBTypeCabac (PWelsDecoderContext pCtx, PWelsNeighAvail pNeighAvail, uint32_t& uiSubMbType) {
-  uint32_t uiCode;
-  PWelsCabacDecEngine pCabacDecEngine = pCtx->pCabacDecEngine;
-  PWelsCabacCtx pBinCtx = pCtx->pCabacCtx + NEW_CTX_OFFSET_SUBMB_TYPE;
-  WELS_READ_VERIFY (DecodeBinCabac (pCabacDecEngine, pBinCtx, uiCode));
-  if (uiCode)
-    uiSubMbType = 0;
-  else {
-    WELS_READ_VERIFY (DecodeBinCabac (pCabacDecEngine, pBinCtx + 1, uiCode));
-    if (uiCode) {
-      WELS_READ_VERIFY (DecodeBinCabac (pCabacDecEngine, pBinCtx + 2, uiCode));
-      uiSubMbType = 3 - uiCode;
-    } else {
-      uiSubMbType = 1;
-    }
-  }
-  return ERR_NONE;
-}
-
-int32_t ParseIntraPredModeLumaCabac (PWelsDecoderContext pCtx, int32_t& iBinVal) {
-  uint32_t uiCode;
-  iBinVal = 0;
-  WELS_READ_VERIFY (DecodeBinCabac (pCtx->pCabacDecEngine, pCtx->pCabacCtx + NEW_CTX_OFFSET_IPR, uiCode));
-  if (uiCode == 1)
-    iBinVal = -1;
-  else {
-    WELS_READ_VERIFY (DecodeBinCabac (pCtx->pCabacDecEngine, pCtx->pCabacCtx + NEW_CTX_OFFSET_IPR + 1, uiCode));
-    iBinVal |= uiCode;
-    WELS_READ_VERIFY (DecodeBinCabac (pCtx->pCabacDecEngine, pCtx->pCabacCtx + NEW_CTX_OFFSET_IPR + 1, uiCode));
-    iBinVal |= (uiCode << 1);
-    WELS_READ_VERIFY (DecodeBinCabac (pCtx->pCabacDecEngine, pCtx->pCabacCtx + NEW_CTX_OFFSET_IPR + 1, uiCode));
-    iBinVal |= (uiCode << 2);
-  }
-  return ERR_NONE;
-}
-
-int32_t ParseIntraPredModeChromaCabac (PWelsDecoderContext pCtx, uint8_t uiNeighAvail, int32_t& iBinVal) {
-  uint32_t uiCode;
-  int32_t iIdxA, iIdxB, iCtxInc;
-  int8_t* pChromaPredMode = pCtx->pCurDqLayer->pChromaPredMode;
-  int16_t* pMbType = pCtx->pCurDqLayer->pMbType;
-  int32_t iLeftAvail     = uiNeighAvail & 0x04;
-  int32_t iTopAvail      = uiNeighAvail & 0x01;
-
-  int32_t iMbXy = pCtx->pCurDqLayer->iMbXyIndex;
-  int32_t iMbXyTop = iMbXy - pCtx->pCurDqLayer->iMbWidth;
-  int32_t iMbXyLeft = iMbXy - 1;
-
-  iBinVal = 0;
-
-  iIdxB = iTopAvail  && (pChromaPredMode[iMbXyTop] > 0 && pChromaPredMode[iMbXyTop] <= 3)
-          && pMbType[iMbXyTop]  != MB_TYPE_INTRA_PCM;
-  iIdxA = iLeftAvail && (pChromaPredMode[iMbXyLeft] > 0 && pChromaPredMode[iMbXyLeft] <= 3)
-          && pMbType[iMbXyLeft] != MB_TYPE_INTRA_PCM;
-  iCtxInc = iIdxA + iIdxB;
-  WELS_READ_VERIFY (DecodeBinCabac (pCtx->pCabacDecEngine, pCtx->pCabacCtx + NEW_CTX_OFFSET_CIPR + iCtxInc, uiCode));
-  iBinVal = uiCode;
-  if (iBinVal != 0) {
-    uint32_t iSym;
-    WELS_READ_VERIFY (DecodeBinCabac (pCtx->pCabacDecEngine, pCtx->pCabacCtx + NEW_CTX_OFFSET_CIPR + 3, iSym));
-    if (iSym == 0) {
-      iBinVal = (iSym + 1);
-      return ERR_NONE;
-    }
-    iSym = 0;
-    do {
-      WELS_READ_VERIFY (DecodeBinCabac (pCtx->pCabacDecEngine, pCtx->pCabacCtx + NEW_CTX_OFFSET_CIPR + 3, uiCode));
-      ++iSym;
-    } while ((uiCode != 0) && (iSym < 1));
-
-    if ((uiCode != 0) && (iSym == 1))
-      ++ iSym;
-    iBinVal = (iSym + 1);
-    return ERR_NONE;
-  }
-  return ERR_NONE;
-}
-
-int32_t ParseInterMotionInfoCabac (PWelsDecoderContext pCtx, PWelsNeighAvail pNeighAvail, uint8_t* pNonZeroCount,
-                                   int16_t pMotionVector[LIST_A][30][MV_A], int16_t pMvdCache[LIST_A][30][MV_A], int8_t pRefIndex[LIST_A][30]) {
-  PSlice pSlice                 = &pCtx->pCurDqLayer->sLayerInfo.sSliceInLayer;
-  PSliceHeader pSliceHeader     = &pSlice->sSliceHeaderExt.sSliceHeader;
-  PDqLayer pCurDqLayer = pCtx->pCurDqLayer;
-  PPicture* ppRefPic = pCtx->sRefPic.pRefList[LIST_0];
-  int32_t pRefCount[2];
-  int32_t i, j;
-  int32_t iMbXy = pCurDqLayer->iMbXyIndex;
-  int16_t pMv[4] = {0};
-  int16_t pMvd[4] = {0};
-  int8_t iRef[2] = {0};
-  int32_t iPartIdx;
-  int16_t iMinVmv = pSliceHeader->pSps->pSLevelLimits->iMinVmv;
-  int16_t iMaxVmv = pSliceHeader->pSps->pSLevelLimits->iMaxVmv;
-  pRefCount[0] = pSliceHeader->uiRefCount[0];
-  pRefCount[1] = pSliceHeader->uiRefCount[1];
-
-  switch (pCurDqLayer->pMbType[iMbXy]) {
-  case MB_TYPE_16x16: {
-    iPartIdx = 0;
-    WELS_READ_VERIFY (ParseRefIdxCabac (pCtx, pNeighAvail, pNonZeroCount, pRefIndex, LIST_0, iPartIdx, pRefCount[0], 0,
-                                        iRef[0]));
-    if ((iRef[0] < 0) || (iRef[0] >= pRefCount[0]) || (ppRefPic[iRef[0]] == NULL)) { //error ref_idx
-      pCtx->bMbRefConcealed = true;
-      if (pCtx->pParam->eEcActiveIdc != ERROR_CON_DISABLE) {
-        iRef[0] = 0;
-        pCtx->iErrorCode |= dsBitstreamError;
-      } else {
-        return GENERATE_ERROR_NO (ERR_LEVEL_MB_DATA, ERR_INFO_INVALID_REF_INDEX);
-      }
-    }
-    pCtx->bMbRefConcealed = pCtx->bRPLRError || pCtx->bMbRefConcealed || ! (ppRefPic[iRef[0]]
-                            && ppRefPic[iRef[0]]->bIsComplete);
-    PredMv (pMotionVector, pRefIndex, 0, 4, iRef[0], pMv);
-    WELS_READ_VERIFY (ParseMvdInfoCabac (pCtx, pNeighAvail, pRefIndex, pMvdCache, iPartIdx, LIST_0, 0, pMvd[0]));
-    WELS_READ_VERIFY (ParseMvdInfoCabac (pCtx, pNeighAvail, pRefIndex, pMvdCache, iPartIdx, LIST_0, 1, pMvd[1]));
-    pMv[0] += pMvd[0];
-    pMv[1] += pMvd[1];
-    WELS_CHECK_SE_BOTH_WARNING (pMv[1], iMinVmv, iMaxVmv, "vertical mv");
-    UpdateP16x16MotionInfo (pCurDqLayer, iRef[0], pMv);
-    UpdateP16x16MvdCabac (pCurDqLayer, pMvd, LIST_0);
-  }
-  break;
-  case MB_TYPE_16x8:
-    for (i = 0; i < 2; i++) {
-      iPartIdx = i << 3;
-      WELS_READ_VERIFY (ParseRefIdxCabac (pCtx, pNeighAvail, pNonZeroCount, pRefIndex, LIST_0, iPartIdx, pRefCount[0], 0,
-                                          iRef[i]));
-      if ((iRef[i] < 0) || (iRef[i] >= pRefCount[0]) || (ppRefPic[iRef[i]] == NULL)) { //error ref_idx
-        pCtx->bMbRefConcealed = true;
-        if (pCtx->pParam->eEcActiveIdc != ERROR_CON_DISABLE) {
-          iRef[i] = 0;
-          pCtx->iErrorCode |= dsBitstreamError;
-        } else {
-          return GENERATE_ERROR_NO (ERR_LEVEL_MB_DATA, ERR_INFO_INVALID_REF_INDEX);
-        }
-      }
-      pCtx->bMbRefConcealed = pCtx->bRPLRError || pCtx->bMbRefConcealed || ! (ppRefPic[iRef[i]]
-                              && ppRefPic[iRef[i]]->bIsComplete);
-      UpdateP16x8RefIdxCabac (pCurDqLayer, pRefIndex, iPartIdx, iRef[i], LIST_0);
-    }
-    for (i = 0; i < 2; i++) {
-      iPartIdx = i << 3;
-      PredInter16x8Mv (pMotionVector, pRefIndex, iPartIdx, iRef[i], pMv);
-      WELS_READ_VERIFY (ParseMvdInfoCabac (pCtx, pNeighAvail, pRefIndex, pMvdCache, iPartIdx, LIST_0, 0, pMvd[0]));
-      WELS_READ_VERIFY (ParseMvdInfoCabac (pCtx, pNeighAvail, pRefIndex, pMvdCache, iPartIdx, LIST_0, 1, pMvd[1]));
-      pMv[0] += pMvd[0];
-      pMv[1] += pMvd[1];
-      WELS_CHECK_SE_BOTH_WARNING (pMv[1], iMinVmv, iMaxVmv, "vertical mv");
-      UpdateP16x8MotionInfo (pCurDqLayer, pMotionVector, pRefIndex, iPartIdx, iRef[i], pMv);
-      UpdateP16x8MvdCabac (pCurDqLayer, pMvdCache, iPartIdx, pMvd, LIST_0);
-    }
-    break;
-  case MB_TYPE_8x16:
-    for (i = 0; i < 2; i++) {
-      iPartIdx = i << 2;
-      WELS_READ_VERIFY (ParseRefIdxCabac (pCtx, pNeighAvail, pNonZeroCount, pRefIndex, LIST_0, iPartIdx, pRefCount[0], 0,
-                                          iRef[i]));
-      if ((iRef[i] < 0) || (iRef[i] >= pRefCount[0]) || (ppRefPic[iRef[i]] == NULL)) { //error ref_idx
-        pCtx->bMbRefConcealed = true;
-        if (pCtx->pParam->eEcActiveIdc != ERROR_CON_DISABLE) {
-          iRef[i] = 0;
-          pCtx->iErrorCode |= dsBitstreamError;
-        } else {
-          return GENERATE_ERROR_NO (ERR_LEVEL_MB_DATA, ERR_INFO_INVALID_REF_INDEX);
-        }
-      }
-      pCtx->bMbRefConcealed = pCtx->bRPLRError || pCtx->bMbRefConcealed || ! (ppRefPic[iRef[i]]
-                              && ppRefPic[iRef[i]]->bIsComplete);
-      UpdateP8x16RefIdxCabac (pCurDqLayer, pRefIndex, iPartIdx, iRef[i], LIST_0);
-    }
-    for (i = 0; i < 2; i++) {
-      iPartIdx = i << 2;
-      PredInter8x16Mv (pMotionVector, pRefIndex, i << 2, iRef[i], pMv/*&mv[0], &mv[1]*/);
-
-      WELS_READ_VERIFY (ParseMvdInfoCabac (pCtx, pNeighAvail, pRefIndex, pMvdCache, iPartIdx, LIST_0, 0, pMvd[0]));
-      WELS_READ_VERIFY (ParseMvdInfoCabac (pCtx, pNeighAvail, pRefIndex, pMvdCache, iPartIdx, LIST_0, 1, pMvd[1]));
-      pMv[0] += pMvd[0];
-      pMv[1] += pMvd[1];
-      WELS_CHECK_SE_BOTH_WARNING (pMv[1], iMinVmv, iMaxVmv, "vertical mv");
-      UpdateP8x16MotionInfo (pCurDqLayer, pMotionVector, pRefIndex, iPartIdx, iRef[i], pMv);
-      UpdateP8x16MvdCabac (pCurDqLayer, pMvdCache, iPartIdx, pMvd, LIST_0);
-    }
-    break;
-  case MB_TYPE_8x8:
-  case MB_TYPE_8x8_REF0: {
-    int8_t pRefIdx[4] = {0}, pSubPartCount[4], pPartW[4];
-    uint32_t uiSubMbType;
-    //sub_mb_type, partition
-    for (i = 0; i < 4; i++) {
-      WELS_READ_VERIFY (ParseSubMBTypeCabac (pCtx, pNeighAvail, uiSubMbType));
-      if (uiSubMbType >= 4) { //invalid sub_mb_type
-        return GENERATE_ERROR_NO (ERR_LEVEL_MB_DATA, ERR_INFO_INVALID_SUB_MB_TYPE);
-      }
-      pCurDqLayer->pSubMbType[iMbXy][i] = g_ksInterSubMbTypeInfo[uiSubMbType].iType;
-      pSubPartCount[i] = g_ksInterSubMbTypeInfo[uiSubMbType].iPartCount;
-      pPartW[i] = g_ksInterSubMbTypeInfo[uiSubMbType].iPartWidth;
-
-      // Need modification when B picture add in, reference to 7.3.5
-      pCurDqLayer->pNoSubMbPartSizeLessThan8x8Flag[iMbXy] &= (uiSubMbType == 0);
-    }
-
-    for (i = 0; i < 4; i++) {
-      int16_t iIdx8 = i << 2;
-      WELS_READ_VERIFY (ParseRefIdxCabac (pCtx, pNeighAvail, pNonZeroCount, pRefIndex, LIST_0, iIdx8, pRefCount[0], 1,
-                                          pRefIdx[i]));
-      if ((pRefIdx[i] < 0) || (pRefIdx[i] >= pRefCount[0]) || (ppRefPic[pRefIdx[i]] == NULL)) { //error ref_idx
-        pCtx->bMbRefConcealed = true;
-        if (pCtx->pParam->eEcActiveIdc != ERROR_CON_DISABLE) {
-          pRefIdx[i] = 0;
-          pCtx->iErrorCode |= dsBitstreamError;
-        } else {
-          return GENERATE_ERROR_NO (ERR_LEVEL_MB_DATA, ERR_INFO_INVALID_REF_INDEX);
-        }
-      }
-      pCtx->bMbRefConcealed = pCtx->bRPLRError || pCtx->bMbRefConcealed || ! (ppRefPic[pRefIdx[i]]
-                              && ppRefPic[pRefIdx[i]]->bIsComplete);
-      UpdateP8x8RefIdxCabac (pCurDqLayer, pRefIndex, iIdx8, pRefIdx[i], LIST_0);
-    }
-    //mv
-    for (i = 0; i < 4; i++) {
-      int8_t iPartCount = pSubPartCount[i];
-      uiSubMbType = pCurDqLayer->pSubMbType[iMbXy][i];
-      int16_t iPartIdx, iBlockW = pPartW[i];
-      uint8_t iScan4Idx, iCacheIdx;
-      iCacheIdx = g_kuiCache30ScanIdx[i << 2];
-      pRefIndex[0][iCacheIdx ] = pRefIndex[0][iCacheIdx + 1]
-                                 = pRefIndex[0][iCacheIdx + 6] = pRefIndex[0][iCacheIdx + 7] = pRefIdx[i];
-
-      for (j = 0; j < iPartCount; j++) {
-        iPartIdx = (i << 2) + j * iBlockW;
-        iScan4Idx = g_kuiScan4[iPartIdx];
-        iCacheIdx = g_kuiCache30ScanIdx[iPartIdx];
-        PredMv (pMotionVector, pRefIndex, iPartIdx, iBlockW, pRefIdx[i], pMv);
-        WELS_READ_VERIFY (ParseMvdInfoCabac (pCtx, pNeighAvail, pRefIndex, pMvdCache, iPartIdx, LIST_0, 0, pMvd[0]));
-        WELS_READ_VERIFY (ParseMvdInfoCabac (pCtx, pNeighAvail, pRefIndex, pMvdCache, iPartIdx, LIST_0, 1, pMvd[1]));
-        pMv[0] += pMvd[0];
-        pMv[1] += pMvd[1];
-        WELS_CHECK_SE_BOTH_WARNING (pMv[1], iMinVmv, iMaxVmv, "vertical mv");
-        if (SUB_MB_TYPE_8x8 == uiSubMbType) {
-          ST32 ((pMv + 2), LD32 (pMv));
-          ST32 ((pMvd + 2), LD32 (pMvd));
-          ST64 (pCurDqLayer->pMv[0][iMbXy][iScan4Idx], LD64 (pMv));
-          ST64 (pCurDqLayer->pMv[0][iMbXy][iScan4Idx + 4], LD64 (pMv));
-          ST64 (pCurDqLayer->pMvd[0][iMbXy][iScan4Idx], LD64 (pMvd));
-          ST64 (pCurDqLayer->pMvd[0][iMbXy][iScan4Idx + 4], LD64 (pMvd));
-          ST64 (pMotionVector[0][iCacheIdx  ], LD64 (pMv));
-          ST64 (pMotionVector[0][iCacheIdx + 6], LD64 (pMv));
-          ST64 (pMvdCache[0][iCacheIdx  ], LD64 (pMvd));
-          ST64 (pMvdCache[0][iCacheIdx + 6], LD64 (pMvd));
-        } else if (SUB_MB_TYPE_8x4 == uiSubMbType) {
-          ST32 ((pMv + 2), LD32 (pMv));
-          ST32 ((pMvd + 2), LD32 (pMvd));
-          ST64 (pCurDqLayer->pMv[0][iMbXy][iScan4Idx  ], LD64 (pMv));
-          ST64 (pCurDqLayer->pMvd[0][iMbXy][iScan4Idx  ], LD64 (pMvd));
-          ST64 (pMotionVector[0][iCacheIdx  ], LD64 (pMv));
-          ST64 (pMvdCache[0][iCacheIdx  ], LD64 (pMvd));
-        } else if (SUB_MB_TYPE_4x8 == uiSubMbType) {
-          ST32 (pCurDqLayer->pMv[0][iMbXy][iScan4Idx  ], LD32 (pMv));
-          ST32 (pCurDqLayer->pMv[0][iMbXy][iScan4Idx + 4], LD32 (pMv));
-          ST32 (pCurDqLayer->pMvd[0][iMbXy][iScan4Idx  ], LD32 (pMvd));
-          ST32 (pCurDqLayer->pMvd[0][iMbXy][iScan4Idx + 4], LD32 (pMvd));
-          ST32 (pMotionVector[0][iCacheIdx  ], LD32 (pMv));
-          ST32 (pMotionVector[0][iCacheIdx + 6], LD32 (pMv));
-          ST32 (pMvdCache[0][iCacheIdx  ], LD32 (pMvd));
-          ST32 (pMvdCache[0][iCacheIdx + 6], LD32 (pMvd));
-        } else {  //SUB_MB_TYPE_4x4
-          ST32 (pCurDqLayer->pMv[0][iMbXy][iScan4Idx  ], LD32 (pMv));
-          ST32 (pCurDqLayer->pMvd[0][iMbXy][iScan4Idx  ], LD32 (pMvd));
-          ST32 (pMotionVector[0][iCacheIdx  ], LD32 (pMv));
-          ST32 (pMvdCache[0][iCacheIdx  ], LD32 (pMvd));
-        }
-      }
-    }
-  }
-  break;
-  default:
-    break;
-  }
-  return ERR_NONE;
-}
-
-int32_t ParseRefIdxCabac (PWelsDecoderContext pCtx, PWelsNeighAvail pNeighAvail, uint8_t* nzc,
-                          int8_t ref_idx[LIST_A][30],
-                          int32_t iListIdx, int32_t iZOrderIdx, int32_t iActiveRefNum, int32_t b8mode, int8_t& iRefIdxVal) {
-  if (iActiveRefNum == 1) {
-    iRefIdxVal = 0;
-    return ERR_NONE;
-  }
-  uint32_t uiCode;
-  int32_t iIdxA = 0, iIdxB = 0;
-  int32_t iCtxInc;
-  int8_t* pRefIdxInMB = pCtx->pCurDqLayer->pRefIndex[LIST_0][pCtx->pCurDqLayer->iMbXyIndex];
-  if (iZOrderIdx == 0) {
-    iIdxB = (pNeighAvail->iTopAvail && pNeighAvail->iTopType != MB_TYPE_INTRA_PCM
-             && ref_idx[iListIdx][g_kuiCache30ScanIdx[iZOrderIdx] - 6] > 0);
-    iIdxA = (pNeighAvail->iLeftAvail && pNeighAvail->iLeftType != MB_TYPE_INTRA_PCM
-             && ref_idx[iListIdx][g_kuiCache30ScanIdx[iZOrderIdx] - 1] > 0);
-  } else if (iZOrderIdx == 4) {
-    iIdxB = (pNeighAvail->iTopAvail && pNeighAvail->iTopType != MB_TYPE_INTRA_PCM
-             && ref_idx[iListIdx][g_kuiCache30ScanIdx[iZOrderIdx] - 6] > 0);
-    iIdxA = pRefIdxInMB[g_kuiScan4[iZOrderIdx] - 1] > 0;
-  } else if (iZOrderIdx == 8) {
-    iIdxB = pRefIdxInMB[g_kuiScan4[iZOrderIdx] - 4] > 0;
-    iIdxA = (pNeighAvail->iLeftAvail && pNeighAvail->iLeftType != MB_TYPE_INTRA_PCM
-             && ref_idx[iListIdx][g_kuiCache30ScanIdx[iZOrderIdx] - 1] > 0);
-  } else {
-    iIdxB = pRefIdxInMB[g_kuiScan4[iZOrderIdx] - 4] > 0;
-    iIdxA = pRefIdxInMB[g_kuiScan4[iZOrderIdx] - 1] > 0;
-  }
-
-  iCtxInc = iIdxA + (iIdxB << 1);
-  WELS_READ_VERIFY (DecodeBinCabac (pCtx->pCabacDecEngine, pCtx->pCabacCtx + NEW_CTX_OFFSET_REF_NO + iCtxInc, uiCode));
-  if (uiCode) {
-    WELS_READ_VERIFY (DecodeUnaryBinCabac (pCtx->pCabacDecEngine, pCtx->pCabacCtx + NEW_CTX_OFFSET_REF_NO + 4, 1, uiCode));
-    ++uiCode;
-  }
-  iRefIdxVal = (int8_t) uiCode;
-  return ERR_NONE;
-}
-
-int32_t ParseMvdInfoCabac (PWelsDecoderContext pCtx, PWelsNeighAvail pNeighAvail, int8_t pRefIndex[LIST_A][30],
-                           int16_t pMvdCache[LIST_A][30][2], int32_t index, int8_t iListIdx, int8_t iMvComp, int16_t& iMvdVal) {
-  uint32_t uiCode;
-  int32_t iIdxA = 0;
-  //int32_t sym;
-  int32_t iCtxInc;
-  PWelsCabacCtx pBinCtx = pCtx->pCabacCtx + NEW_CTX_OFFSET_MVD + iMvComp * CTX_NUM_MVD;
-  iMvdVal = 0;
-  if (pRefIndex[iListIdx][g_kuiCache30ScanIdx[index] - 6] >= 0)
-    iIdxA = WELS_ABS (pMvdCache[iListIdx][g_kuiCache30ScanIdx[index] - 6][iMvComp]);
-  if (pRefIndex[iListIdx][g_kuiCache30ScanIdx[index] - 1] >= 0)
-    iIdxA += WELS_ABS (pMvdCache[iListIdx][g_kuiCache30ScanIdx[index] - 1][iMvComp]);
-
-  if (iIdxA < 3)
-    iCtxInc = 0;
-  else if (iIdxA > 32)
-    iCtxInc = 2;
-  else
-    iCtxInc = 1;
-  WELS_READ_VERIFY (DecodeBinCabac (pCtx->pCabacDecEngine,  pBinCtx + iCtxInc, uiCode));
-  if (uiCode) {
-    WELS_READ_VERIFY (DecodeUEGMvCabac (pCtx->pCabacDecEngine, pBinCtx + 3, 3, uiCode));
-    iMvdVal = (int16_t) (uiCode + 1);
-    WELS_READ_VERIFY (DecodeBypassCabac (pCtx->pCabacDecEngine, uiCode));
-    if (uiCode) {
-      iMvdVal = -iMvdVal;
-    }
-  } else {
-    iMvdVal = 0;
-  }
-  return ERR_NONE;
-}
-
-int32_t ParseCbpInfoCabac (PWelsDecoderContext pCtx, PWelsNeighAvail pNeighAvail, uint32_t& uiCbp) {
-  int32_t iIdxA = 0, iIdxB = 0, pALeftMb[2], pBTopMb[2];
-  uiCbp = 0;
-  uint32_t pCbpBit[6];
-  int32_t iCtxInc;
-
-  //Luma: bit by bit for 4 8x8 blocks in z-order
-  pBTopMb[0]  = pNeighAvail->iTopAvail  && pNeighAvail->iTopType  != MB_TYPE_INTRA_PCM
-                && ((pNeighAvail->iTopCbp  & (1 << 2)) == 0);
-  pBTopMb[1]  = pNeighAvail->iTopAvail  && pNeighAvail->iTopType  != MB_TYPE_INTRA_PCM
-                && ((pNeighAvail->iTopCbp  & (1 << 3)) == 0);
-  pALeftMb[0] = pNeighAvail->iLeftAvail && pNeighAvail->iLeftType != MB_TYPE_INTRA_PCM
-                && ((pNeighAvail->iLeftCbp & (1 << 1)) == 0);
-  pALeftMb[1] = pNeighAvail->iLeftAvail && pNeighAvail->iLeftType != MB_TYPE_INTRA_PCM
-                && ((pNeighAvail->iLeftCbp & (1 << 3)) == 0);
-
-  //left_top 8x8 block
-  iCtxInc = pALeftMb[0] + (pBTopMb[0] << 1);
-  WELS_READ_VERIFY (DecodeBinCabac (pCtx->pCabacDecEngine, pCtx->pCabacCtx + NEW_CTX_OFFSET_CBP + iCtxInc, pCbpBit[0]));
-  if (pCbpBit[0])
-    uiCbp += 0x01;
-
-  //right_top 8x8 block
-  iIdxA = !pCbpBit[0];
-  iCtxInc = iIdxA + (pBTopMb[1] << 1);
-  WELS_READ_VERIFY (DecodeBinCabac (pCtx->pCabacDecEngine, pCtx->pCabacCtx + NEW_CTX_OFFSET_CBP + iCtxInc, pCbpBit[1]));
-  if (pCbpBit[1])
-    uiCbp += 0x02;
-
-  //left_bottom 8x8 block
-  iIdxB = !pCbpBit[0];
-  iCtxInc = pALeftMb[1] + (iIdxB << 1);
-  WELS_READ_VERIFY (DecodeBinCabac (pCtx->pCabacDecEngine, pCtx->pCabacCtx + NEW_CTX_OFFSET_CBP + iCtxInc, pCbpBit[2]));
-  if (pCbpBit[2])
-    uiCbp += 0x04;
-
-  //right_bottom 8x8 block
-  iIdxB = !pCbpBit[1];
-  iIdxA = !pCbpBit[2];
-  iCtxInc = iIdxA + (iIdxB << 1);
-  WELS_READ_VERIFY (DecodeBinCabac (pCtx->pCabacDecEngine, pCtx->pCabacCtx + NEW_CTX_OFFSET_CBP + iCtxInc, pCbpBit[3]));
-  if (pCbpBit[3])
-    uiCbp += 0x08;
-
-  if (pCtx->pSps->uiChromaFormatIdc == 0)//monochroma
-    return ERR_NONE;
-
-
-  //Chroma: bit by bit
-  iIdxB = pNeighAvail->iTopAvail  && (pNeighAvail->iTopType  == MB_TYPE_INTRA_PCM || (pNeighAvail->iTopCbp  >> 4));
-  iIdxA = pNeighAvail->iLeftAvail && (pNeighAvail->iLeftType == MB_TYPE_INTRA_PCM || (pNeighAvail->iLeftCbp >> 4));
-
-  //BitIdx = 0
-  iCtxInc = iIdxA + (iIdxB << 1);
-  WELS_READ_VERIFY (DecodeBinCabac (pCtx->pCabacDecEngine, pCtx->pCabacCtx + NEW_CTX_OFFSET_CBP + CTX_NUM_CBP + iCtxInc,
-                                    pCbpBit[4]));
-
-  //BitIdx = 1
-  if (pCbpBit[4]) {
-    iIdxB = pNeighAvail->iTopAvail  && (pNeighAvail->iTopType  == MB_TYPE_INTRA_PCM || (pNeighAvail->iTopCbp  >> 4) == 2);
-    iIdxA = pNeighAvail->iLeftAvail && (pNeighAvail->iLeftType == MB_TYPE_INTRA_PCM || (pNeighAvail->iLeftCbp >> 4) == 2);
-    iCtxInc = iIdxA + (iIdxB << 1);
-    WELS_READ_VERIFY (DecodeBinCabac (pCtx->pCabacDecEngine,
-                                      pCtx->pCabacCtx + NEW_CTX_OFFSET_CBP + 2 * CTX_NUM_CBP + iCtxInc,
-                                      pCbpBit[5]));
-    uiCbp += 1 << (4 + pCbpBit[5]);
-
-  }
-
-  return ERR_NONE;
-}
-
-int32_t ParseDeltaQpCabac (PWelsDecoderContext pCtx, int32_t& iQpDelta) {
-  uint32_t uiCode;
-  PSlice pCurrSlice = & (pCtx->pCurDqLayer->sLayerInfo.sSliceInLayer);
-  iQpDelta = 0;
-  PWelsCabacCtx pBinCtx = pCtx->pCabacCtx + NEW_CTX_OFFSET_DELTA_QP;
-  int32_t iCtxInc = (pCurrSlice->iLastDeltaQp != 0);
-  WELS_READ_VERIFY (DecodeBinCabac (pCtx->pCabacDecEngine, pBinCtx + iCtxInc, uiCode));
-  if (uiCode != 0) {
-    WELS_READ_VERIFY (DecodeUnaryBinCabac (pCtx->pCabacDecEngine, pBinCtx + 2, 1, uiCode));
-    uiCode++;
-    iQpDelta = (uiCode + 1) >> 1;
-    if ((uiCode & 1) == 0)
-      iQpDelta = - iQpDelta;
-  }
-  pCurrSlice->iLastDeltaQp = iQpDelta;
-  return ERR_NONE;
-}
-
-int32_t ParseCbfInfoCabac (PWelsNeighAvail pNeighAvail, uint8_t* pNzcCache, int32_t iZIndex, int32_t iResProperty,
-                           PWelsDecoderContext pCtx, uint32_t& uiCbfBit) {
-  int8_t nA, nB/*, zigzag_idx = 0*/;
-  int32_t iCurrBlkXy = pCtx->pCurDqLayer->iMbXyIndex;
-  int32_t iTopBlkXy = iCurrBlkXy - pCtx->pCurDqLayer->iMbWidth; //default value: MB neighboring
-  int32_t iLeftBlkXy = iCurrBlkXy - 1; //default value: MB neighboring
-  uint16_t* pCbfDc = pCtx->pCurDqLayer->pCbfDc;
-  int16_t* pMbType = pCtx->pCurDqLayer->pMbType;
-  int32_t iCtxInc;
-  uiCbfBit = 0;
-  nA = nB = (int8_t)!!IS_INTRA (pMbType[iCurrBlkXy]);
-
-  if (iResProperty == I16_LUMA_DC || iResProperty == CHROMA_DC_U || iResProperty == CHROMA_DC_V) { //DC
-    if (pNeighAvail->iTopAvail)
-      nB = (pMbType[iTopBlkXy] == MB_TYPE_INTRA_PCM) || ((pCbfDc[iTopBlkXy] >> iResProperty) & 1);
-    if (pNeighAvail->iLeftAvail)
-      nA = (pMbType[iLeftBlkXy] == MB_TYPE_INTRA_PCM) || ((pCbfDc[iLeftBlkXy] >> iResProperty) & 1);
-    iCtxInc = nA + (nB << 1);
-    WELS_READ_VERIFY (DecodeBinCabac (pCtx->pCabacDecEngine,
-                                      pCtx->pCabacCtx + NEW_CTX_OFFSET_CBF + g_kBlockCat2CtxOffsetCBF[iResProperty] + iCtxInc, uiCbfBit));
-    if (uiCbfBit)
-      pCbfDc[iCurrBlkXy] |= (1 << iResProperty);
-  } else { //AC
-    //for 4x4 blk, make sure blk-idx is correct
-    if (pNzcCache[g_kCacheNzcScanIdx[iZIndex] - 8] != 0xff) { //top blk available
-      if (g_kTopBlkInsideMb[iZIndex])
-        iTopBlkXy = iCurrBlkXy;
-      nB = pNzcCache[g_kCacheNzcScanIdx[iZIndex] - 8] || pMbType[iTopBlkXy]  == MB_TYPE_INTRA_PCM;
-    }
-    if (pNzcCache[g_kCacheNzcScanIdx[iZIndex] - 1] != 0xff) { //left blk available
-      if (g_kLeftBlkInsideMb[iZIndex])
-        iLeftBlkXy = iCurrBlkXy;
-      nA = pNzcCache[g_kCacheNzcScanIdx[iZIndex] - 1] || pMbType[iLeftBlkXy] == MB_TYPE_INTRA_PCM;
-    }
-
-    iCtxInc = nA + (nB << 1);
-    WELS_READ_VERIFY (DecodeBinCabac (pCtx->pCabacDecEngine,
-                                      pCtx->pCabacCtx + NEW_CTX_OFFSET_CBF + g_kBlockCat2CtxOffsetCBF[iResProperty] + iCtxInc, uiCbfBit));
-  }
-  return ERR_NONE;
-}
-
-int32_t ParseSignificantMapCabac (int32_t* pSignificantMap, int32_t iResProperty, PWelsDecoderContext pCtx,
-                                  uint32_t& uiCoeffNum) {
-  uint32_t uiCode;
-
-  PWelsCabacCtx pMapCtx  = pCtx->pCabacCtx + (iResProperty == LUMA_DC_AC_8 ? NEW_CTX_OFFSET_MAP_8x8 : NEW_CTX_OFFSET_MAP)
-                           + g_kBlockCat2CtxOffsetMap [iResProperty];
-  PWelsCabacCtx pLastCtx = pCtx->pCabacCtx + (iResProperty == LUMA_DC_AC_8 ? NEW_CTX_OFFSET_LAST_8x8 :
-                           NEW_CTX_OFFSET_LAST) + g_kBlockCat2CtxOffsetLast[iResProperty];
-
-
-  int32_t i;
-  uiCoeffNum = 0;
-  int32_t i0 = 0;
-  int32_t i1 = g_kMaxPos[iResProperty];
-
-  int32_t iCtx;
-
-  for (i = i0; i < i1; ++i) {
-    iCtx = (iResProperty == LUMA_DC_AC_8 ? g_kuiIdx2CtxSignificantCoeffFlag8x8[i] : i);
-    //read significant
-    WELS_READ_VERIFY (DecodeBinCabac (pCtx->pCabacDecEngine, pMapCtx + iCtx, uiCode));
-    if (uiCode) {
-      * (pSignificantMap++) = 1;
-      ++ uiCoeffNum;
-      //read last significant
-      iCtx = (iResProperty == LUMA_DC_AC_8 ? g_kuiIdx2CtxLastSignificantCoeffFlag8x8[i] : i);
-      WELS_READ_VERIFY (DecodeBinCabac (pCtx->pCabacDecEngine, pLastCtx + iCtx, uiCode));
-      if (uiCode) {
-        memset (pSignificantMap, 0, (i1 - i) * sizeof (int32_t));
-        return ERR_NONE;
-      }
-    } else
-      * (pSignificantMap++) = 0;
-  }
-
-  //deal with last pSignificantMap if no data
-  //if(i < i1+1)
-  {
-    *pSignificantMap = 1;
-    ++uiCoeffNum;
-  }
-
-  return ERR_NONE;
-}
-
-int32_t ParseSignificantCoeffCabac (int32_t* pSignificant, int32_t iResProperty, PWelsDecoderContext pCtx) {
-  uint32_t uiCode;
-  PWelsCabacCtx pOneCtx = pCtx->pCabacCtx + (iResProperty == LUMA_DC_AC_8 ? NEW_CTX_OFFSET_ONE_8x8 : NEW_CTX_OFFSET_ONE) +
-                          g_kBlockCat2CtxOffsetOne[iResProperty];
-  PWelsCabacCtx pAbsCtx = pCtx->pCabacCtx + (iResProperty == LUMA_DC_AC_8 ? NEW_CTX_OFFSET_ABS_8x8 : NEW_CTX_OFFSET_ABS) +
-                          g_kBlockCat2CtxOffsetAbs[iResProperty];
-
-  const int16_t iMaxType = g_kMaxC2[iResProperty];
-  int32_t i = g_kMaxPos[iResProperty];
-  int32_t* pCoff = pSignificant + i;
-  int32_t c1 = 1;
-  int32_t c2 = 0;
-  for (; i >= 0; --i) {
-    if (*pCoff != 0) {
-      WELS_READ_VERIFY (DecodeBinCabac (pCtx->pCabacDecEngine, pOneCtx + c1, uiCode));
-      *pCoff += uiCode;
-      if (*pCoff == 2) {
-        WELS_READ_VERIFY (DecodeUEGLevelCabac (pCtx->pCabacDecEngine, pAbsCtx + c2, uiCode));
-        *pCoff += uiCode;
-        ++c2;
-        c2 = WELS_MIN (c2, iMaxType);
-        c1 = 0;
-      } else if (c1) {
-        ++c1;
-        c1 = WELS_MIN (c1, 4);
-      }
-      WELS_READ_VERIFY (DecodeBypassCabac (pCtx->pCabacDecEngine, uiCode));
-      if (uiCode)
-        *pCoff = - *pCoff;
-    }
-    pCoff--;
-  }
-  return ERR_NONE;
-}
-
-int32_t ParseResidualBlockCabac8x8 (PWelsNeighAvail pNeighAvail, uint8_t* pNonZeroCountCache, SBitStringAux* pBsAux,
-                                    int32_t iIndex, int32_t iMaxNumCoeff, const uint8_t* pScanTable, int32_t iResProperty,
-                                    short* sTCoeff, /*int mb_mode*/ uint8_t uiQp, PWelsDecoderContext pCtx) {
-  uint32_t uiTotalCoeffNum = 0;
-  uint32_t uiCbpBit;
-  int32_t pSignificantMap[64] = {0};
-
-  int32_t iMbResProperty = 0;
-  GetMbResProperty (&iMbResProperty, &iResProperty, false);
-  const uint16_t* pDeQuantMul = (pCtx->bUseScalingList) ? pCtx->pDequant_coeff8x8[iMbResProperty - 6][uiQp] :
-                                g_kuiDequantCoeff8x8[uiQp];
-
-  uiCbpBit = 1; // for 8x8, MaxNumCoeff == 64 && uiCbpBit == 1
-  if (uiCbpBit) { //has coeff
-    WELS_READ_VERIFY (ParseSignificantMapCabac (pSignificantMap, iResProperty, pCtx, uiTotalCoeffNum));
-    WELS_READ_VERIFY (ParseSignificantCoeffCabac (pSignificantMap, iResProperty, pCtx));
-  }
-
-  pNonZeroCountCache[g_kCacheNzcScanIdx[iIndex]] =
-    pNonZeroCountCache[g_kCacheNzcScanIdx[iIndex + 1]] =
-      pNonZeroCountCache[g_kCacheNzcScanIdx[iIndex + 2]] =
-        pNonZeroCountCache[g_kCacheNzcScanIdx[iIndex + 3]] = (uint8_t)uiTotalCoeffNum;
-  if (uiTotalCoeffNum == 0) {
-    return ERR_NONE;
-  }
-  int32_t j = 0, i;
-  if (iResProperty == LUMA_DC_AC_8) {
-    do {
-      if (pSignificantMap[j] != 0) {
-        i = pScanTable[ j ];
-        sTCoeff[i] = uiQp >= 36 ? ((pSignificantMap[j] * pDeQuantMul[i]) * (1 << (uiQp / 6 - 6))) : ((
-                       pSignificantMap[j] * pDeQuantMul[i] + (1 << (5 - uiQp / 6))) >> (6 - uiQp / 6));
-      }
-      ++j;
-    } while (j < 64);
-  }
-
-  return ERR_NONE;
-}
-
-int32_t ParseResidualBlockCabac (PWelsNeighAvail pNeighAvail, uint8_t* pNonZeroCountCache, SBitStringAux* pBsAux,
-                                 int32_t iIndex, int32_t iMaxNumCoeff,
-                                 const uint8_t* pScanTable, int32_t iResProperty, short* sTCoeff, /*int mb_mode*/ uint8_t uiQp,
-                                 PWelsDecoderContext pCtx) {
-  int32_t iCurNzCacheIdx;
-  uint32_t uiTotalCoeffNum = 0;
-  uint32_t uiCbpBit;
-  int32_t pSignificantMap[16] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
-
-  int32_t iMbResProperty = 0;
-  GetMbResProperty (&iMbResProperty, &iResProperty, false);
-  const uint16_t* pDeQuantMul = (pCtx->bUseScalingList) ? pCtx->pDequant_coeff4x4[iMbResProperty][uiQp] :
-                                g_kuiDequantCoeff[uiQp];
-
-  WELS_READ_VERIFY (ParseCbfInfoCabac (pNeighAvail, pNonZeroCountCache, iIndex, iResProperty, pCtx, uiCbpBit));
-  if (uiCbpBit) { //has coeff
-    WELS_READ_VERIFY (ParseSignificantMapCabac (pSignificantMap, iResProperty, pCtx, uiTotalCoeffNum));
-    WELS_READ_VERIFY (ParseSignificantCoeffCabac (pSignificantMap, iResProperty, pCtx));
-  }
-
-  iCurNzCacheIdx = g_kCacheNzcScanIdx[iIndex];
-  pNonZeroCountCache[iCurNzCacheIdx] = (uint8_t)uiTotalCoeffNum;
-  if (uiTotalCoeffNum == 0) {
-    return ERR_NONE;
-  }
-  int32_t j = 0;
-  if (iResProperty == I16_LUMA_DC) {
-    do {
-      sTCoeff[pScanTable[j]] = pSignificantMap[j];
-      ++j;
-    } while (j < 16);
-    WelsLumaDcDequantIdct (sTCoeff, uiQp, pCtx);
-  } else if (iResProperty == CHROMA_DC_U || iResProperty == CHROMA_DC_V) {
-    do {
-      sTCoeff[pScanTable[j]] = pSignificantMap[j];
-      ++j;
-    } while (j < 4);
-    //iHadamard2x2
-    WelsChromaDcIdct (sTCoeff);
-    //scaling
-    if (!pCtx->bUseScalingList) {
-      for (j = 0; j < 4; ++j) {
-        sTCoeff[pScanTable[j]] = (int16_t) ((int64_t)sTCoeff[pScanTable[j]] * (int64_t)pDeQuantMul[0] >> 1);
-      }
-    } else { //with scaling list
-      for (j = 0; j < 4; ++j) {
-        sTCoeff[pScanTable[j]] = (int16_t) ((int64_t)sTCoeff[pScanTable[j]] * (int64_t)pDeQuantMul[0] >> 5);
-      }
-    }
-  } else { //luma ac, chroma ac
-    do {
-      if (pSignificantMap[j] != 0) {
-        if (!pCtx->bUseScalingList) {
-          sTCoeff[pScanTable[j]] = pSignificantMap[j] * pDeQuantMul[pScanTable[j] & 0x07];
-        } else {
-          sTCoeff[pScanTable[j]] = (int16_t) (((int64_t)pSignificantMap[j] * (int64_t)pDeQuantMul[pScanTable[j]] + 8) >> 4);
-        }
-      }
-      ++j;
-    } while (j < 16);
-  }
-  return ERR_NONE;
-}
-
-int32_t ParseIPCMInfoCabac (PWelsDecoderContext pCtx) {
-  int32_t i;
-  PWelsCabacDecEngine pCabacDecEngine = pCtx->pCabacDecEngine;
-  SBitStringAux* pBsAux = pCtx->pCurDqLayer->pBitStringAux;
-  SDqLayer* pCurLayer = pCtx->pCurDqLayer;
-  int32_t iDstStrideLuma = pCurLayer->pDec->iLinesize[0];
-  int32_t iDstStrideChroma = pCurLayer->pDec->iLinesize[1];
-  int32_t iMbX = pCurLayer->iMbX;
-  int32_t iMbY = pCurLayer->iMbY;
-  int32_t iMbXy = pCurLayer->iMbXyIndex;
-
-  int32_t iMbOffsetLuma = (iMbX + iMbY * iDstStrideLuma) << 4;
-  int32_t iMbOffsetChroma = (iMbX + iMbY * iDstStrideChroma) << 3;
-
-  uint8_t* pMbDstY = pCtx->pDec->pData[0] + iMbOffsetLuma;
-  uint8_t* pMbDstU = pCtx->pDec->pData[1] + iMbOffsetChroma;
-  uint8_t* pMbDstV = pCtx->pDec->pData[2] + iMbOffsetChroma;
-
-  uint8_t* pPtrSrc;
-
-  pCurLayer->pMbType[iMbXy] = MB_TYPE_INTRA_PCM;
-  RestoreCabacDecEngineToBS (pCabacDecEngine, pBsAux);
-  intX_t iBytesLeft = pBsAux->pEndBuf - pBsAux->pCurBuf;
-  if (iBytesLeft < 384) {
-    return GENERATE_ERROR_NO (ERR_LEVEL_MB_DATA, ERR_CABAC_NO_BS_TO_READ);
-  }
-  pPtrSrc = pBsAux->pCurBuf;
-  if (!pCtx->pParam->bParseOnly) {
-    for (i = 0; i < 16; i++) {   //luma
-      memcpy (pMbDstY , pPtrSrc, 16);
-      pMbDstY += iDstStrideLuma;
-      pPtrSrc += 16;
-    }
-    for (i = 0; i < 8; i++) {   //cb
-      memcpy (pMbDstU, pPtrSrc, 8);
-      pMbDstU += iDstStrideChroma;
-      pPtrSrc += 8;
-    }
-    for (i = 0; i < 8; i++) {   //cr
-      memcpy (pMbDstV, pPtrSrc, 8);
-      pMbDstV += iDstStrideChroma;
-      pPtrSrc += 8;
-    }
-  }
-
-  pBsAux->pCurBuf += 384;
-
-  pCurLayer->pLumaQp[iMbXy] = 0;
-  pCurLayer->pChromaQp[iMbXy][0] = pCurLayer->pChromaQp[iMbXy][1] = 0;
-  memset (pCurLayer->pNzc[iMbXy], 16, sizeof (pCurLayer->pNzc[iMbXy]));
-
-  //step 4: cabac engine init
-  WELS_READ_VERIFY (InitReadBits (pBsAux, 1));
-  WELS_READ_VERIFY (InitCabacDecEngineFromBS (pCabacDecEngine, pBsAux));
-  return ERR_NONE;
-}
-}
+/*!
+ * \copy
+ *     Copyright (c)  2013, Cisco Systems
+ *     All rights reserved.
+ *
+ *     Redistribution and use in source and binary forms, with or without
+ *     modification, are permitted provided that the following conditions
+ *     are met:
+ *
+ *        * Redistributions of source code must retain the above copyright
+ *          notice, this list of conditions and the following disclaimer.
+ *
+ *        * Redistributions in binary form must reproduce the above copyright
+ *          notice, this list of conditions and the following disclaimer in
+ *          the documentation and/or other materials provided with the
+ *          distribution.
+ *
+ *     THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *     "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *     LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+ *     FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+ *     COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ *     INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ *     BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ *     LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+ *     CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ *     LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
+ *     ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ *     POSSIBILITY OF SUCH DAMAGE.
+ *
+ *      parse_mb_syn_cabac.cpp: cabac parse for syntax elements
+ */
+#include "parse_mb_syn_cabac.h"
+#include "decode_slice.h"
+#include "mv_pred.h"
+#include "error_code.h"
+#include <stdio.h>
+namespace WelsDec {
+#define IDX_UNUSED -1
+
+static const int16_t g_kMaxPos       [] = {IDX_UNUSED, 15, 14, 15, 3, 14, 63, 3, 3, 14, 14};
+static const int16_t g_kMaxC2       [] = {IDX_UNUSED, 4, 4, 4, 3, 4, 4, 3, 3, 4, 4};
+static const int16_t g_kBlockCat2CtxOffsetCBF[] = {IDX_UNUSED, 0, 4, 8, 12, 16, 0, 12, 12, 16, 16};
+static const int16_t g_kBlockCat2CtxOffsetMap [] = {IDX_UNUSED, 0, 15, 29, 44, 47, 0, 44, 44, 47, 47};
+static const int16_t g_kBlockCat2CtxOffsetLast[] = {IDX_UNUSED, 0, 15, 29, 44, 47, 0, 44, 44, 47, 47};
+static const int16_t g_kBlockCat2CtxOffsetOne [] = {IDX_UNUSED, 0, 10, 20, 30, 39, 0, 30, 30, 39, 39};
+static const int16_t g_kBlockCat2CtxOffsetAbs [] = {IDX_UNUSED, 0, 10, 20, 30, 39, 0, 30, 30, 39, 39};
+
+const uint8_t g_kTopBlkInsideMb[24] = { //for index with z-order 0~23
+  //  0   1 | 4  5      luma 8*8 block           pNonZeroCount[16+8]
+  0,  0,  1,  1,   //  2   3 | 6  7        0  |  1                  0   1   2   3
+  0,  0,  1,  1,   //---------------      ---------                 4   5   6   7
+  1,  1,  1,  1,   //  8   9 | 12 13       2  |  3                  8   9  10  11
+  1,  1,  1,  1,  // 10  11 | 14 15-----------------------------> 12  13  14  15
+  0,  0,  1,  1,   //----------------    chroma 8*8 block          16  17  18  19
+  0,  0,  1,  1   // 16  17 | 20 21        0    1                 20  21  22  23
+  // 18  19 | 22 23
+};
+
+const uint8_t g_kLeftBlkInsideMb[24] = { //for index with z-order 0~23
+  //  0   1 | 4  5      luma 8*8 block           pNonZeroCount[16+8]
+  0,  1,  0,  1,   //  2   3 | 6  7        0  |  1                  0   1   2   3
+  1,  1,  1,  1,   //---------------      ---------                 4   5   6   7
+  0,  1,  0,  1,   //  8   9 | 12 13       2  |  3                  8   9  10  11
+  1,  1,  1,  1,  // 10  11 | 14 15-----------------------------> 12  13  14  15
+  0,  1,  0,  1,   //----------------    chroma 8*8 block          16  17  18  19
+  0,  1,  0,  1   // 16  17 | 20 21        0    1                 20  21  22  23
+  // 18  19 | 22 23
+};
+
+static uint32_t DecodeCabacIntraMbType (PWelsDecoderContext pCtx, PWelsNeighAvail pNeighAvail, int ctx_base) {
+  uint32_t uiCode;
+  uint32_t uiMbType = 0;
+
+  PWelsCabacDecEngine pCabacDecEngine = pCtx->pCabacDecEngine;
+  PWelsCabacCtx pBinCtx = pCtx->pCabacCtx + ctx_base;
+
+  WELS_READ_VERIFY (DecodeBinCabac (pCabacDecEngine, pBinCtx, uiCode));
+  if (!uiCode) {
+    return 0; /* I4x4 */
+  }
+
+  DecodeTerminateCabac (pCabacDecEngine, uiCode);
+  if (uiCode) {
+    return 25; /* PCM */
+  }
+  uiMbType = 1; /* I16x16 */
+  WELS_READ_VERIFY (DecodeBinCabac (pCabacDecEngine, pBinCtx + 1, uiCode)); /* cbp_luma != 0 */
+  uiMbType += 12 * uiCode;
+
+  WELS_READ_VERIFY (DecodeBinCabac (pCabacDecEngine, pBinCtx + 2, uiCode));
+  if (uiCode) {
+    WELS_READ_VERIFY (DecodeBinCabac (pCabacDecEngine, pBinCtx + 2, uiCode));
+    uiMbType += 4 + 4 * uiCode;
+  }
+  WELS_READ_VERIFY (DecodeBinCabac (pCabacDecEngine, pBinCtx + 3, uiCode));
+  uiMbType += 2 * uiCode;
+  WELS_READ_VERIFY (DecodeBinCabac (pCabacDecEngine, pBinCtx + 3, uiCode));
+  uiMbType += 1 * uiCode;
+  return uiMbType;
+}
+
+void UpdateP16x8RefIdxCabac (PDqLayer pCurDqLayer, int8_t pRefIndex[LIST_A][30], int32_t iPartIdx, const int8_t iRef,
+                             const int8_t iListIdx) {
+  uint32_t iRef32Bit = (uint32_t) iRef;
+  const int32_t iRef4Bytes = (iRef32Bit << 24) | (iRef32Bit << 16) | (iRef32Bit << 8) | iRef32Bit;
+  int32_t iMbXy = pCurDqLayer->iMbXyIndex;
+  const uint8_t iScan4Idx = g_kuiScan4[iPartIdx];
+  const uint8_t iScan4Idx4 = 4 + iScan4Idx;
+  const uint8_t iCacheIdx = g_kuiCache30ScanIdx[iPartIdx];
+  const uint8_t iCacheIdx6 = 6 + iCacheIdx;
+  //mb
+  ST32 (&pCurDqLayer->pRefIndex[iListIdx][iMbXy][iScan4Idx ], iRef4Bytes);
+  ST32 (&pCurDqLayer->pRefIndex[iListIdx][iMbXy][iScan4Idx4], iRef4Bytes);
+  //cache
+  ST32 (&pRefIndex[iListIdx][iCacheIdx ], iRef4Bytes);
+  ST32 (&pRefIndex[iListIdx][iCacheIdx6], iRef4Bytes);
+}
+
+void UpdateP8x16RefIdxCabac (PDqLayer pCurDqLayer, int8_t pRefIndex[LIST_A][30], int32_t iPartIdx, const int8_t iRef,
+                             const int8_t iListIdx) {
+  uint16_t iRef16Bit = (uint16_t) iRef;
+  const int16_t iRef2Bytes = (iRef16Bit << 8) | iRef16Bit;
+  int32_t i;
+  int32_t iMbXy = pCurDqLayer->iMbXyIndex;
+  for (i = 0; i < 2; i++, iPartIdx += 8) {
+    const uint8_t iScan4Idx = g_kuiScan4[iPartIdx];
+    const uint8_t iCacheIdx = g_kuiCache30ScanIdx[iPartIdx];
+    const uint8_t iScan4Idx4 = 4 + iScan4Idx;
+    const uint8_t iCacheIdx6 = 6 + iCacheIdx;
+    //mb
+    ST16 (&pCurDqLayer->pRefIndex[iListIdx][iMbXy][iScan4Idx ], iRef2Bytes);
+    ST16 (&pCurDqLayer->pRefIndex[iListIdx][iMbXy][iScan4Idx4], iRef2Bytes);
+    //cache
+    ST16 (&pRefIndex[iListIdx][iCacheIdx ], iRef2Bytes);
+    ST16 (&pRefIndex[iListIdx][iCacheIdx6], iRef2Bytes);
+  }
+}
+
+void UpdateP8x8RefIdxCabac (PDqLayer pCurDqLayer, int8_t pRefIndex[LIST_A][30], int32_t iPartIdx, const int8_t iRef,
+                            const int8_t iListIdx) {
+  int32_t iMbXy = pCurDqLayer->iMbXyIndex;
+  const uint8_t iScan4Idx = g_kuiScan4[iPartIdx];
+  pCurDqLayer->pRefIndex[iListIdx][iMbXy][iScan4Idx] = pCurDqLayer->pRefIndex[iListIdx][iMbXy][iScan4Idx + 1] =
+        pCurDqLayer->pRefIndex[iListIdx][iMbXy][iScan4Idx + 4] = pCurDqLayer->pRefIndex[iListIdx][iMbXy][iScan4Idx + 5] = iRef;
+}
+
+void UpdateP8x8DirectCabac (PDqLayer pCurDqLayer, int32_t iPartIdx) {
+  int32_t iMbXy = pCurDqLayer->iMbXyIndex;
+  const uint8_t iScan4Idx = g_kuiScan4[iPartIdx];
+  pCurDqLayer->pDirect[iMbXy][iScan4Idx] = pCurDqLayer->pDirect[iMbXy][iScan4Idx + 1] =
+        pCurDqLayer->pDirect[iMbXy][iScan4Idx + 4] = pCurDqLayer->pDirect[iMbXy][iScan4Idx + 5] = 1;
+}
+
+void UpdateP16x16DirectCabac (PDqLayer pCurDqLayer) {
+  int32_t i;
+  int32_t iMbXy = pCurDqLayer->iMbXyIndex;
+  const int16_t direct = (1 << 8) | 1;
+  for (i = 0; i < 16; i += 4) {
+    const uint8_t kuiScan4Idx = g_kuiScan4[i];
+    const uint8_t kuiScan4IdxPlus4 = 4 + kuiScan4Idx;
+    ST16 (&pCurDqLayer->pDirect[iMbXy][kuiScan4Idx], direct);
+    ST16 (&pCurDqLayer->pDirect[iMbXy][kuiScan4IdxPlus4], direct);
+  }
+}
+
+void UpdateP16x16MvdCabac (SDqLayer* pCurDqLayer, int16_t pMvd[2], const int8_t iListIdx) {
+  int32_t pMvd32[2];
+  ST32 (&pMvd32[0], LD32 (pMvd));
+  ST32 (&pMvd32[1], LD32 (pMvd));
+  int32_t i;
+  int32_t iMbXy = pCurDqLayer->iMbXyIndex;
+  for (i = 0; i < 16; i += 2) {
+    ST64 (pCurDqLayer->pMvd[iListIdx][iMbXy][i], LD64 (pMvd32));
+  }
+}
+
+void UpdateP16x8MvdCabac (SDqLayer* pCurDqLayer, int16_t pMvdCache[LIST_A][30][MV_A], int32_t iPartIdx, int16_t pMvd[2],
+                          const int8_t iListIdx) {
+  int32_t pMvd32[2];
+  ST32 (&pMvd32[0], LD32 (pMvd));
+  ST32 (&pMvd32[1], LD32 (pMvd));
+  int32_t i;
+  int32_t iMbXy = pCurDqLayer->iMbXyIndex;
+  for (i = 0; i < 2; i++, iPartIdx += 4) {
+    const uint8_t iScan4Idx = g_kuiScan4[iPartIdx];
+    const uint8_t iScan4Idx4 = 4 + iScan4Idx;
+    const uint8_t iCacheIdx = g_kuiCache30ScanIdx[iPartIdx];
+    const uint8_t iCacheIdx6 = 6 + iCacheIdx;
+    //mb
+    ST64 (pCurDqLayer->pMvd[iListIdx][iMbXy][  iScan4Idx ], LD64 (pMvd32));
+    ST64 (pCurDqLayer->pMvd[iListIdx][iMbXy][  iScan4Idx4], LD64 (pMvd32));
+    //cache
+    ST64 (pMvdCache[iListIdx][  iCacheIdx ], LD64 (pMvd32));
+    ST64 (pMvdCache[iListIdx][  iCacheIdx6], LD64 (pMvd32));
+  }
+}
+
+void UpdateP8x16MvdCabac (SDqLayer* pCurDqLayer, int16_t pMvdCache[LIST_A][30][MV_A], int32_t iPartIdx, int16_t pMvd[2],
+                          const int8_t iListIdx) {
+  int32_t pMvd32[2];
+  ST32 (&pMvd32[0], LD32 (pMvd));
+  ST32 (&pMvd32[1], LD32 (pMvd));
+  int32_t i;
+  int32_t iMbXy = pCurDqLayer->iMbXyIndex;
+
+  for (i = 0; i < 2; i++, iPartIdx += 8) {
+    const uint8_t iScan4Idx = g_kuiScan4[iPartIdx];
+    const uint8_t iCacheIdx = g_kuiCache30ScanIdx[iPartIdx];
+    const uint8_t iScan4Idx4 = 4 + iScan4Idx;
+    const uint8_t iCacheIdx6 = 6 + iCacheIdx;
+    //mb
+    ST64 (pCurDqLayer->pMvd[iListIdx][iMbXy][  iScan4Idx ], LD64 (pMvd32));
+    ST64 (pCurDqLayer->pMvd[iListIdx][iMbXy][  iScan4Idx4], LD64 (pMvd32));
+    //cache
+    ST64 (pMvdCache[iListIdx][  iCacheIdx ], LD64 (pMvd32));
+    ST64 (pMvdCache[iListIdx][  iCacheIdx6], LD64 (pMvd32));
+  }
+}
+
+int32_t ParseEndOfSliceCabac (PWelsDecoderContext pCtx, uint32_t& uiBinVal) {
+  uiBinVal = 0;
+  WELS_READ_VERIFY (DecodeTerminateCabac (pCtx->pCabacDecEngine, uiBinVal));
+  return ERR_NONE;
+}
+
+int32_t ParseSkipFlagCabac (PWelsDecoderContext pCtx, PWelsNeighAvail pNeighAvail, uint32_t& uiSkip) {
+  uiSkip = 0;
+  int32_t iCtxInc = NEW_CTX_OFFSET_SKIP;
+  iCtxInc += (pNeighAvail->iLeftAvail && !IS_SKIP (pNeighAvail->iLeftType)) + (pNeighAvail->iTopAvail
+             && !IS_SKIP (pNeighAvail->iTopType));
+  if (B_SLICE == pCtx->eSliceType)
+    iCtxInc += 13;
+  PWelsCabacCtx pBinCtx = (pCtx->pCabacCtx + iCtxInc);
+  WELS_READ_VERIFY (DecodeBinCabac (pCtx->pCabacDecEngine, pBinCtx, uiSkip));
+  return ERR_NONE;
+}
+
+
+int32_t ParseMBTypeISliceCabac (PWelsDecoderContext pCtx, PWelsNeighAvail pNeighAvail, uint32_t& uiBinVal) {
+  uint32_t uiCode;
+  int32_t iIdxA = 0, iIdxB = 0;
+  int32_t iCtxInc;
+  uiBinVal = 0;
+  PWelsCabacDecEngine pCabacDecEngine = pCtx->pCabacDecEngine;
+  PWelsCabacCtx pBinCtx = pCtx->pCabacCtx + NEW_CTX_OFFSET_MB_TYPE_I; //I mode in I slice
+  iIdxA = (pNeighAvail->iLeftAvail) && (pNeighAvail->iLeftType != MB_TYPE_INTRA4x4
+                                        && pNeighAvail->iLeftType != MB_TYPE_INTRA8x8);
+  iIdxB = (pNeighAvail->iTopAvail) && (pNeighAvail->iTopType != MB_TYPE_INTRA4x4
+                                       && pNeighAvail->iTopType != MB_TYPE_INTRA8x8);
+  iCtxInc = iIdxA + iIdxB;
+  WELS_READ_VERIFY (DecodeBinCabac (pCabacDecEngine, pBinCtx + iCtxInc, uiCode));
+  uiBinVal = uiCode;
+  if (uiBinVal != 0) {  //I16x16
+    WELS_READ_VERIFY (DecodeTerminateCabac (pCabacDecEngine, uiCode));
+    if (uiCode == 1)
+      uiBinVal = 25; //I_PCM
+    else {
+      WELS_READ_VERIFY (DecodeBinCabac (pCabacDecEngine, pBinCtx + 3, uiCode));
+      uiBinVal = 1 + uiCode * 12;
+      //decoding of uiCbp:0,1,2
+      WELS_READ_VERIFY (DecodeBinCabac (pCabacDecEngine, pBinCtx + 4, uiCode));
+      if (uiCode != 0) {
+        WELS_READ_VERIFY (DecodeBinCabac (pCabacDecEngine, pBinCtx + 5, uiCode));
+        uiBinVal += 4;
+        if (uiCode != 0)
+          uiBinVal += 4;
+      }
+      //decoding of I pred-mode: 0,1,2,3
+      WELS_READ_VERIFY (DecodeBinCabac (pCabacDecEngine, pBinCtx + 6, uiCode));
+      uiBinVal += (uiCode << 1);
+      WELS_READ_VERIFY (DecodeBinCabac (pCabacDecEngine, pBinCtx + 7, uiCode));
+      uiBinVal += uiCode;
+    }
+  }
+  //I4x4
+  return ERR_NONE;
+}
+
+int32_t ParseMBTypePSliceCabac (PWelsDecoderContext pCtx, PWelsNeighAvail pNeighAvail, uint32_t& uiMbType) {
+  uint32_t uiCode;
+  uiMbType = 0;
+  PWelsCabacDecEngine pCabacDecEngine = pCtx->pCabacDecEngine;
+
+  PWelsCabacCtx pBinCtx = pCtx->pCabacCtx + NEW_CTX_OFFSET_SKIP;
+  WELS_READ_VERIFY (DecodeBinCabac (pCabacDecEngine, pBinCtx + 3, uiCode));
+  if (uiCode) {
+    // Intra MB
+    WELS_READ_VERIFY (DecodeBinCabac (pCabacDecEngine, pBinCtx + 6, uiCode));
+    if (uiCode) { // Intra 16x16
+      WELS_READ_VERIFY (DecodeTerminateCabac (pCabacDecEngine, uiCode));
+      if (uiCode) {
+        uiMbType = 30;
+        return ERR_NONE;//MB_TYPE_INTRA_PCM;
+      }
+
+      WELS_READ_VERIFY (DecodeBinCabac (pCabacDecEngine, pBinCtx + 7, uiCode));
+      uiMbType = 6 + uiCode * 12;
+
+      //uiCbp: 0,1,2
+      WELS_READ_VERIFY (DecodeBinCabac (pCabacDecEngine, pBinCtx + 8, uiCode));
+      if (uiCode) {
+        uiMbType += 4;
+        WELS_READ_VERIFY (DecodeBinCabac (pCabacDecEngine, pBinCtx + 8, uiCode));
+        if (uiCode)
+          uiMbType += 4;
+      }
+
+      //IPredMode: 0,1,2,3
+      WELS_READ_VERIFY (DecodeBinCabac (pCabacDecEngine, pBinCtx + 9, uiCode));
+      uiMbType += (uiCode << 1);
+      WELS_READ_VERIFY (DecodeBinCabac (pCabacDecEngine, pBinCtx + 9, uiCode));
+      uiMbType += uiCode;
+    } else
+      // Intra 4x4
+      uiMbType = 5;
+  } else { // P MB
+    WELS_READ_VERIFY (DecodeBinCabac (pCabacDecEngine, pBinCtx + 4, uiCode));
+    if (uiCode) { //second bit
+      WELS_READ_VERIFY (DecodeBinCabac (pCabacDecEngine, pBinCtx + 6, uiCode));
+      if (uiCode)
+        uiMbType = 1;
+      else
+        uiMbType = 2;
+    } else {
+      WELS_READ_VERIFY (DecodeBinCabac (pCabacDecEngine, pBinCtx + 5, uiCode));
+      if (uiCode)
+        uiMbType = 3;
+      else
+        uiMbType = 0;
+    }
+  }
+  return ERR_NONE;
+}
+
+int32_t ParseMBTypeBSliceCabac (PWelsDecoderContext pCtx, PWelsNeighAvail pNeighAvail, uint32_t& uiMbType) {
+  uint32_t uiCode;
+  uiMbType = 0;
+  int32_t iIdxA = 0, iIdxB = 0;
+  int32_t iCtxInc;
+
+  PWelsCabacDecEngine pCabacDecEngine = pCtx->pCabacDecEngine;
+  PWelsCabacCtx pBinCtx = pCtx->pCabacCtx + 27; //B slice
+
+  iIdxA = (pNeighAvail->iLeftAvail) && !IS_DIRECT (pNeighAvail->iLeftType);
+  iIdxB = (pNeighAvail->iTopAvail) && !IS_DIRECT (pNeighAvail->iTopType);
+
+  iCtxInc = iIdxA + iIdxB;
+  WELS_READ_VERIFY (DecodeBinCabac (pCabacDecEngine, pBinCtx + iCtxInc, uiCode));
+  if (!uiCode)
+    uiMbType = 0; // Bi_Direct
+  else {
+    WELS_READ_VERIFY (DecodeBinCabac (pCabacDecEngine, pBinCtx + 3, uiCode));
+    if (!uiCode) {
+      WELS_READ_VERIFY (DecodeBinCabac (pCabacDecEngine, pBinCtx + 5, uiCode));
+      uiMbType = 1 + uiCode; // 16x16 L0L1
+    } else {
+      WELS_READ_VERIFY (DecodeBinCabac (pCabacDecEngine, pBinCtx + 4, uiCode));
+      uiMbType = uiCode << 3;
+      WELS_READ_VERIFY (DecodeBinCabac (pCabacDecEngine, pBinCtx + 5, uiCode));
+      uiMbType |= uiCode << 2;
+      WELS_READ_VERIFY (DecodeBinCabac (pCabacDecEngine, pBinCtx + 5, uiCode));
+      uiMbType |= uiCode << 1;
+      WELS_READ_VERIFY (DecodeBinCabac (pCabacDecEngine, pBinCtx + 5, uiCode));
+      uiMbType |= uiCode;
+      if (uiMbType < 8) {
+        uiMbType += 3;
+        return ERR_NONE;
+      } else if (uiMbType == 13) {
+        uiMbType = DecodeCabacIntraMbType (pCtx, pNeighAvail, 32) + 23;
+        return ERR_NONE;
+      } else if (uiMbType == 14) {
+        uiMbType = 11; // Bi8x16
+        return ERR_NONE;
+      } else if (uiMbType == 15) {
+        uiMbType = 22; // 8x8
+        return ERR_NONE;
+      }
+      uiMbType <<= 1;
+      WELS_READ_VERIFY (DecodeBinCabac (pCabacDecEngine, pBinCtx + 5, uiCode));
+      uiMbType |= uiCode;
+      uiMbType -= 4;
+    }
+  }
+  return ERR_NONE;
+}
+
+int32_t ParseTransformSize8x8FlagCabac (PWelsDecoderContext pCtx, PWelsNeighAvail pNeighAvail,
+                                        bool& bTransformSize8x8Flag) {
+  uint32_t uiCode;
+  int32_t iIdxA, iIdxB;
+  int32_t iCtxInc;
+  PWelsCabacDecEngine pCabacDecEngine = pCtx->pCabacDecEngine;
+  PWelsCabacCtx pBinCtx = pCtx->pCabacCtx + NEW_CTX_OFFSET_TS_8x8_FLAG;
+  iIdxA = (pNeighAvail->iLeftAvail) && (pCtx->pCurDqLayer->pTransformSize8x8Flag[pCtx->pCurDqLayer->iMbXyIndex - 1]);
+  iIdxB = (pNeighAvail->iTopAvail)
+          && (pCtx->pCurDqLayer->pTransformSize8x8Flag[pCtx->pCurDqLayer->iMbXyIndex - pCtx->pCurDqLayer->iMbWidth]);
+  iCtxInc = iIdxA + iIdxB;
+  WELS_READ_VERIFY (DecodeBinCabac (pCabacDecEngine, pBinCtx + iCtxInc, uiCode));
+  bTransformSize8x8Flag = !!uiCode;
+
+  return ERR_NONE;
+}
+
+int32_t ParseSubMBTypeCabac (PWelsDecoderContext pCtx, PWelsNeighAvail pNeighAvail, uint32_t& uiSubMbType) {
+  uint32_t uiCode;
+  PWelsCabacDecEngine pCabacDecEngine = pCtx->pCabacDecEngine;
+  PWelsCabacCtx pBinCtx = pCtx->pCabacCtx + NEW_CTX_OFFSET_SUBMB_TYPE;
+  WELS_READ_VERIFY (DecodeBinCabac (pCabacDecEngine, pBinCtx, uiCode));
+  if (uiCode)
+    uiSubMbType = 0;
+  else {
+    WELS_READ_VERIFY (DecodeBinCabac (pCabacDecEngine, pBinCtx + 1, uiCode));
+    if (uiCode) {
+      WELS_READ_VERIFY (DecodeBinCabac (pCabacDecEngine, pBinCtx + 2, uiCode));
+      uiSubMbType = 3 - uiCode;
+    } else {
+      uiSubMbType = 1;
+    }
+  }
+  return ERR_NONE;
+}
+
+int32_t ParseBSubMBTypeCabac (PWelsDecoderContext pCtx, PWelsNeighAvail pNeighAvail, uint32_t& uiSubMbType) {
+  uint32_t uiCode;
+  PWelsCabacDecEngine pCabacDecEngine = pCtx->pCabacDecEngine;
+  PWelsCabacCtx pBinCtx = pCtx->pCabacCtx + NEW_CTX_OFFSET_B_SUBMB_TYPE;
+  WELS_READ_VERIFY (DecodeBinCabac (pCabacDecEngine, pBinCtx, uiCode));
+  if (!uiCode) {
+    uiSubMbType = 0; /* B_Direct_8x8 */
+    return ERR_NONE;
+  }
+  WELS_READ_VERIFY (DecodeBinCabac (pCabacDecEngine, pBinCtx + 1, uiCode));
+  if (!uiCode) {
+    WELS_READ_VERIFY (DecodeBinCabac (pCabacDecEngine, pBinCtx + 3, uiCode));
+    uiSubMbType = 1 + uiCode; /* B_L0_8x8, B_L1_8x8 */
+    return ERR_NONE;
+  }
+  uiSubMbType = 3;
+  WELS_READ_VERIFY (DecodeBinCabac (pCabacDecEngine, pBinCtx + 2, uiCode));
+  if (uiCode) {
+    WELS_READ_VERIFY (DecodeBinCabac (pCabacDecEngine, pBinCtx + 3, uiCode));
+    if (uiCode) {
+      WELS_READ_VERIFY (DecodeBinCabac (pCabacDecEngine, pBinCtx + 3, uiCode));
+      uiSubMbType = 11 + uiCode; /* B_L1_4x4, B_Bi_4x4 */
+      return ERR_NONE;
+    }
+    uiSubMbType += 4;
+  }
+  WELS_READ_VERIFY (DecodeBinCabac (pCabacDecEngine, pBinCtx + 3, uiCode));
+  uiSubMbType += 2 * uiCode;
+  WELS_READ_VERIFY (DecodeBinCabac (pCabacDecEngine, pBinCtx + 3, uiCode));
+  uiSubMbType += uiCode;
+
+  return ERR_NONE;
+}
+
+int32_t ParseIntraPredModeLumaCabac (PWelsDecoderContext pCtx, int32_t& iBinVal) {
+  uint32_t uiCode;
+  iBinVal = 0;
+  WELS_READ_VERIFY (DecodeBinCabac (pCtx->pCabacDecEngine, pCtx->pCabacCtx + NEW_CTX_OFFSET_IPR, uiCode));
+  if (uiCode == 1)
+    iBinVal = -1;
+  else {
+    WELS_READ_VERIFY (DecodeBinCabac (pCtx->pCabacDecEngine, pCtx->pCabacCtx + NEW_CTX_OFFSET_IPR + 1, uiCode));
+    iBinVal |= uiCode;
+    WELS_READ_VERIFY (DecodeBinCabac (pCtx->pCabacDecEngine, pCtx->pCabacCtx + NEW_CTX_OFFSET_IPR + 1, uiCode));
+    iBinVal |= (uiCode << 1);
+    WELS_READ_VERIFY (DecodeBinCabac (pCtx->pCabacDecEngine, pCtx->pCabacCtx + NEW_CTX_OFFSET_IPR + 1, uiCode));
+    iBinVal |= (uiCode << 2);
+  }
+  return ERR_NONE;
+}
+
+int32_t ParseIntraPredModeChromaCabac (PWelsDecoderContext pCtx, uint8_t uiNeighAvail, int32_t& iBinVal) {
+  uint32_t uiCode;
+  int32_t iIdxA, iIdxB, iCtxInc;
+  int8_t* pChromaPredMode = pCtx->pCurDqLayer->pChromaPredMode;
+  uint32_t* pMbType = pCtx->pCurDqLayer->pMbType;
+  int32_t iLeftAvail     = uiNeighAvail & 0x04;
+  int32_t iTopAvail      = uiNeighAvail & 0x01;
+
+  int32_t iMbXy = pCtx->pCurDqLayer->iMbXyIndex;
+  int32_t iMbXyTop = iMbXy - pCtx->pCurDqLayer->iMbWidth;
+  int32_t iMbXyLeft = iMbXy - 1;
+
+  iBinVal = 0;
+
+  iIdxB = iTopAvail  && (pChromaPredMode[iMbXyTop] > 0 && pChromaPredMode[iMbXyTop] <= 3)
+          && pMbType[iMbXyTop]  != MB_TYPE_INTRA_PCM;
+  iIdxA = iLeftAvail && (pChromaPredMode[iMbXyLeft] > 0 && pChromaPredMode[iMbXyLeft] <= 3)
+          && pMbType[iMbXyLeft] != MB_TYPE_INTRA_PCM;
+  iCtxInc = iIdxA + iIdxB;
+  WELS_READ_VERIFY (DecodeBinCabac (pCtx->pCabacDecEngine, pCtx->pCabacCtx + NEW_CTX_OFFSET_CIPR + iCtxInc, uiCode));
+  iBinVal = uiCode;
+  if (iBinVal != 0) {
+    uint32_t iSym;
+    WELS_READ_VERIFY (DecodeBinCabac (pCtx->pCabacDecEngine, pCtx->pCabacCtx + NEW_CTX_OFFSET_CIPR + 3, iSym));
+    if (iSym == 0) {
+      iBinVal = (iSym + 1);
+      return ERR_NONE;
+    }
+    iSym = 0;
+    do {
+      WELS_READ_VERIFY (DecodeBinCabac (pCtx->pCabacDecEngine, pCtx->pCabacCtx + NEW_CTX_OFFSET_CIPR + 3, uiCode));
+      ++iSym;
+    } while ((uiCode != 0) && (iSym < 1));
+
+    if ((uiCode != 0) && (iSym == 1))
+      ++ iSym;
+    iBinVal = (iSym + 1);
+    return ERR_NONE;
+  }
+  return ERR_NONE;
+}
+
+int32_t ParseInterPMotionInfoCabac (PWelsDecoderContext pCtx, PWelsNeighAvail pNeighAvail, uint8_t* pNonZeroCount,
+                                    int16_t pMotionVector[LIST_A][30][MV_A], int16_t pMvdCache[LIST_A][30][MV_A], int8_t pRefIndex[LIST_A][30]) {
+  PSlice pSlice                 = &pCtx->pCurDqLayer->sLayerInfo.sSliceInLayer;
+  PSliceHeader pSliceHeader     = &pSlice->sSliceHeaderExt.sSliceHeader;
+  PDqLayer pCurDqLayer = pCtx->pCurDqLayer;
+  PPicture* ppRefPic = pCtx->sRefPic.pRefList[LIST_0];
+  int32_t pRefCount[2];
+  int32_t i, j;
+  int32_t iMbXy = pCurDqLayer->iMbXyIndex;
+  int16_t pMv[4] = {0};
+  int16_t pMvd[4] = {0};
+  int8_t iRef[2] = {0};
+  int32_t iPartIdx;
+  int16_t iMinVmv = pSliceHeader->pSps->pSLevelLimits->iMinVmv;
+  int16_t iMaxVmv = pSliceHeader->pSps->pSLevelLimits->iMaxVmv;
+  pRefCount[0] = pSliceHeader->uiRefCount[0];
+  pRefCount[1] = pSliceHeader->uiRefCount[1];
+
+  switch (pCurDqLayer->pMbType[iMbXy]) {
+  case MB_TYPE_16x16: {
+    iPartIdx = 0;
+    WELS_READ_VERIFY (ParseRefIdxCabac (pCtx, pNeighAvail, pNonZeroCount, pRefIndex, 0, LIST_0, iPartIdx, pRefCount[0], 0,
+                                        iRef[0]));
+    if ((iRef[0] < 0) || (iRef[0] >= pRefCount[0]) || (ppRefPic[iRef[0]] == NULL)) { //error ref_idx
+      pCtx->bMbRefConcealed = true;
+      if (pCtx->pParam->eEcActiveIdc != ERROR_CON_DISABLE) {
+        iRef[0] = 0;
+        pCtx->iErrorCode |= dsBitstreamError;
+      } else {
+        return GENERATE_ERROR_NO (ERR_LEVEL_MB_DATA, ERR_INFO_INVALID_REF_INDEX);
+      }
+    }
+    pCtx->bMbRefConcealed = pCtx->bRPLRError || pCtx->bMbRefConcealed || ! (ppRefPic[iRef[0]]
+                            && ppRefPic[iRef[0]]->bIsComplete);
+    PredMv (pMotionVector, pRefIndex, LIST_0, 0, 4, iRef[0], pMv);
+    WELS_READ_VERIFY (ParseMvdInfoCabac (pCtx, pNeighAvail, pRefIndex, pMvdCache, iPartIdx, LIST_0, 0, pMvd[0]));
+    WELS_READ_VERIFY (ParseMvdInfoCabac (pCtx, pNeighAvail, pRefIndex, pMvdCache, iPartIdx, LIST_0, 1, pMvd[1]));
+    pMv[0] += pMvd[0];
+    pMv[1] += pMvd[1];
+    WELS_CHECK_SE_BOTH_WARNING (pMv[1], iMinVmv, iMaxVmv, "vertical mv");
+    UpdateP16x16MotionInfo (pCurDqLayer, LIST_0, iRef[0], pMv);
+    UpdateP16x16MvdCabac (pCurDqLayer, pMvd, LIST_0);
+  }
+  break;
+  case MB_TYPE_16x8:
+    for (i = 0; i < 2; i++) {
+      iPartIdx = i << 3;
+      WELS_READ_VERIFY (ParseRefIdxCabac (pCtx, pNeighAvail, pNonZeroCount, pRefIndex, 0, LIST_0, iPartIdx, pRefCount[0], 0,
+                                          iRef[i]));
+      if ((iRef[i] < 0) || (iRef[i] >= pRefCount[0]) || (ppRefPic[iRef[i]] == NULL)) { //error ref_idx
+        pCtx->bMbRefConcealed = true;
+        if (pCtx->pParam->eEcActiveIdc != ERROR_CON_DISABLE) {
+          iRef[i] = 0;
+          pCtx->iErrorCode |= dsBitstreamError;
+        } else {
+          return GENERATE_ERROR_NO (ERR_LEVEL_MB_DATA, ERR_INFO_INVALID_REF_INDEX);
+        }
+      }
+      pCtx->bMbRefConcealed = pCtx->bRPLRError || pCtx->bMbRefConcealed || ! (ppRefPic[iRef[i]]
+                              && ppRefPic[iRef[i]]->bIsComplete);
+      UpdateP16x8RefIdxCabac (pCurDqLayer, pRefIndex, iPartIdx, iRef[i], LIST_0);
+    }
+    for (i = 0; i < 2; i++) {
+      iPartIdx = i << 3;
+      PredInter16x8Mv (pMotionVector, pRefIndex, LIST_0, iPartIdx, iRef[i], pMv);
+      WELS_READ_VERIFY (ParseMvdInfoCabac (pCtx, pNeighAvail, pRefIndex, pMvdCache, iPartIdx, LIST_0, 0, pMvd[0]));
+      WELS_READ_VERIFY (ParseMvdInfoCabac (pCtx, pNeighAvail, pRefIndex, pMvdCache, iPartIdx, LIST_0, 1, pMvd[1]));
+      pMv[0] += pMvd[0];
+      pMv[1] += pMvd[1];
+      WELS_CHECK_SE_BOTH_WARNING (pMv[1], iMinVmv, iMaxVmv, "vertical mv");
+      UpdateP16x8MotionInfo (pCurDqLayer, pMotionVector, pRefIndex, LIST_0, iPartIdx, iRef[i], pMv);
+      UpdateP16x8MvdCabac (pCurDqLayer, pMvdCache, iPartIdx, pMvd, LIST_0);
+    }
+    break;
+  case MB_TYPE_8x16:
+    for (i = 0; i < 2; i++) {
+      iPartIdx = i << 2;
+      WELS_READ_VERIFY (ParseRefIdxCabac (pCtx, pNeighAvail, pNonZeroCount, pRefIndex, 0, LIST_0, iPartIdx, pRefCount[0], 0,
+                                          iRef[i]));
+      if ((iRef[i] < 0) || (iRef[i] >= pRefCount[0]) || (ppRefPic[iRef[i]] == NULL)) { //error ref_idx
+        pCtx->bMbRefConcealed = true;
+        if (pCtx->pParam->eEcActiveIdc != ERROR_CON_DISABLE) {
+          iRef[i] = 0;
+          pCtx->iErrorCode |= dsBitstreamError;
+        } else {
+          return GENERATE_ERROR_NO (ERR_LEVEL_MB_DATA, ERR_INFO_INVALID_REF_INDEX);
+        }
+      }
+      pCtx->bMbRefConcealed = pCtx->bRPLRError || pCtx->bMbRefConcealed || ! (ppRefPic[iRef[i]]
+                              && ppRefPic[iRef[i]]->bIsComplete);
+      UpdateP8x16RefIdxCabac (pCurDqLayer, pRefIndex, iPartIdx, iRef[i], LIST_0);
+    }
+    for (i = 0; i < 2; i++) {
+      iPartIdx = i << 2;
+      PredInter8x16Mv (pMotionVector, pRefIndex, LIST_0, i << 2, iRef[i], pMv/*&mv[0], &mv[1]*/);
+
+      WELS_READ_VERIFY (ParseMvdInfoCabac (pCtx, pNeighAvail, pRefIndex, pMvdCache, iPartIdx, LIST_0, 0, pMvd[0]));
+      WELS_READ_VERIFY (ParseMvdInfoCabac (pCtx, pNeighAvail, pRefIndex, pMvdCache, iPartIdx, LIST_0, 1, pMvd[1]));
+      pMv[0] += pMvd[0];
+      pMv[1] += pMvd[1];
+      WELS_CHECK_SE_BOTH_WARNING (pMv[1], iMinVmv, iMaxVmv, "vertical mv");
+      UpdateP8x16MotionInfo (pCurDqLayer, pMotionVector, pRefIndex, LIST_0, iPartIdx, iRef[i], pMv);
+      UpdateP8x16MvdCabac (pCurDqLayer, pMvdCache, iPartIdx, pMvd, LIST_0);
+    }
+    break;
+  case MB_TYPE_8x8:
+  case MB_TYPE_8x8_REF0: {
+    int8_t pRefIdx[4] = {0}, pSubPartCount[4], pPartW[4];
+    uint32_t uiSubMbType;
+    //sub_mb_type, partition
+    for (i = 0; i < 4; i++) {
+      WELS_READ_VERIFY (ParseSubMBTypeCabac (pCtx, pNeighAvail, uiSubMbType));
+      if (uiSubMbType >= 4) { //invalid sub_mb_type
+        return GENERATE_ERROR_NO (ERR_LEVEL_MB_DATA, ERR_INFO_INVALID_SUB_MB_TYPE);
+      }
+      pCurDqLayer->pSubMbType[iMbXy][i] = g_ksInterPSubMbTypeInfo[uiSubMbType].iType;
+      pSubPartCount[i] = g_ksInterPSubMbTypeInfo[uiSubMbType].iPartCount;
+      pPartW[i] = g_ksInterPSubMbTypeInfo[uiSubMbType].iPartWidth;
+
+      // Need modification when B picture add in, reference to 7.3.5
+      pCurDqLayer->pNoSubMbPartSizeLessThan8x8Flag[iMbXy] &= (uiSubMbType == 0);
+    }
+
+    for (i = 0; i < 4; i++) {
+      int16_t iIdx8 = i << 2;
+      WELS_READ_VERIFY (ParseRefIdxCabac (pCtx, pNeighAvail, pNonZeroCount, pRefIndex, 0, LIST_0, iIdx8, pRefCount[0], 1,
+                                          pRefIdx[i]));
+      if ((pRefIdx[i] < 0) || (pRefIdx[i] >= pRefCount[0]) || (ppRefPic[pRefIdx[i]] == NULL)) { //error ref_idx
+        pCtx->bMbRefConcealed = true;
+        if (pCtx->pParam->eEcActiveIdc != ERROR_CON_DISABLE) {
+          pRefIdx[i] = 0;
+          pCtx->iErrorCode |= dsBitstreamError;
+        } else {
+          return GENERATE_ERROR_NO (ERR_LEVEL_MB_DATA, ERR_INFO_INVALID_REF_INDEX);
+        }
+      }
+      pCtx->bMbRefConcealed = pCtx->bRPLRError || pCtx->bMbRefConcealed || ! (ppRefPic[pRefIdx[i]]
+                              && ppRefPic[pRefIdx[i]]->bIsComplete);
+      UpdateP8x8RefIdxCabac (pCurDqLayer, pRefIndex, iIdx8, pRefIdx[i], LIST_0);
+    }
+    //mv
+    for (i = 0; i < 4; i++) {
+      int8_t iPartCount = pSubPartCount[i];
+      uiSubMbType = pCurDqLayer->pSubMbType[iMbXy][i];
+      int16_t iPartIdx, iBlockW = pPartW[i];
+      uint8_t iScan4Idx, iCacheIdx;
+      iCacheIdx = g_kuiCache30ScanIdx[i << 2];
+      pRefIndex[0][iCacheIdx ] = pRefIndex[0][iCacheIdx + 1]
+                                 = pRefIndex[0][iCacheIdx + 6] = pRefIndex[0][iCacheIdx + 7] = pRefIdx[i];
+
+      for (j = 0; j < iPartCount; j++) {
+        iPartIdx = (i << 2) + j * iBlockW;
+        iScan4Idx = g_kuiScan4[iPartIdx];
+        iCacheIdx = g_kuiCache30ScanIdx[iPartIdx];
+        PredMv (pMotionVector, pRefIndex, LIST_0, iPartIdx, iBlockW, pRefIdx[i], pMv);
+        WELS_READ_VERIFY (ParseMvdInfoCabac (pCtx, pNeighAvail, pRefIndex, pMvdCache, iPartIdx, LIST_0, 0, pMvd[0]));
+        WELS_READ_VERIFY (ParseMvdInfoCabac (pCtx, pNeighAvail, pRefIndex, pMvdCache, iPartIdx, LIST_0, 1, pMvd[1]));
+        pMv[0] += pMvd[0];
+        pMv[1] += pMvd[1];
+        WELS_CHECK_SE_BOTH_WARNING (pMv[1], iMinVmv, iMaxVmv, "vertical mv");
+        if (SUB_MB_TYPE_8x8 == uiSubMbType) {
+          ST32 ((pMv + 2), LD32 (pMv));
+          ST32 ((pMvd + 2), LD32 (pMvd));
+          ST64 (pCurDqLayer->pMv[0][iMbXy][iScan4Idx], LD64 (pMv));
+          ST64 (pCurDqLayer->pMv[0][iMbXy][iScan4Idx + 4], LD64 (pMv));
+          ST64 (pCurDqLayer->pMvd[0][iMbXy][iScan4Idx], LD64 (pMvd));
+          ST64 (pCurDqLayer->pMvd[0][iMbXy][iScan4Idx + 4], LD64 (pMvd));
+          ST64 (pMotionVector[0][iCacheIdx  ], LD64 (pMv));
+          ST64 (pMotionVector[0][iCacheIdx + 6], LD64 (pMv));
+          ST64 (pMvdCache[0][iCacheIdx  ], LD64 (pMvd));
+          ST64 (pMvdCache[0][iCacheIdx + 6], LD64 (pMvd));
+        } else if (SUB_MB_TYPE_8x4 == uiSubMbType) {
+          ST32 ((pMv + 2), LD32 (pMv));
+          ST32 ((pMvd + 2), LD32 (pMvd));
+          ST64 (pCurDqLayer->pMv[0][iMbXy][iScan4Idx  ], LD64 (pMv));
+          ST64 (pCurDqLayer->pMvd[0][iMbXy][iScan4Idx  ], LD64 (pMvd));
+          ST64 (pMotionVector[0][iCacheIdx  ], LD64 (pMv));
+          ST64 (pMvdCache[0][iCacheIdx  ], LD64 (pMvd));
+        } else if (SUB_MB_TYPE_4x8 == uiSubMbType) {
+          ST32 (pCurDqLayer->pMv[0][iMbXy][iScan4Idx  ], LD32 (pMv));
+          ST32 (pCurDqLayer->pMv[0][iMbXy][iScan4Idx + 4], LD32 (pMv));
+          ST32 (pCurDqLayer->pMvd[0][iMbXy][iScan4Idx  ], LD32 (pMvd));
+          ST32 (pCurDqLayer->pMvd[0][iMbXy][iScan4Idx + 4], LD32 (pMvd));
+          ST32 (pMotionVector[0][iCacheIdx  ], LD32 (pMv));
+          ST32 (pMotionVector[0][iCacheIdx + 6], LD32 (pMv));
+          ST32 (pMvdCache[0][iCacheIdx  ], LD32 (pMvd));
+          ST32 (pMvdCache[0][iCacheIdx + 6], LD32 (pMvd));
+        } else {  //SUB_MB_TYPE_4x4
+          ST32 (pCurDqLayer->pMv[0][iMbXy][iScan4Idx  ], LD32 (pMv));
+          ST32 (pCurDqLayer->pMvd[0][iMbXy][iScan4Idx  ], LD32 (pMvd));
+          ST32 (pMotionVector[0][iCacheIdx  ], LD32 (pMv));
+          ST32 (pMvdCache[0][iCacheIdx  ], LD32 (pMvd));
+        }
+      }
+    }
+  }
+  break;
+  default:
+    break;
+  }
+  return ERR_NONE;
+}
+
+int32_t ParseInterBMotionInfoCabac (PWelsDecoderContext pCtx, PWelsNeighAvail pNeighAvail, uint8_t* pNonZeroCount,
+                                    int16_t pMotionVector[LIST_A][30][MV_A], int16_t pMvdCache[LIST_A][30][MV_A], int8_t pRefIndex[LIST_A][30],
+                                    int8_t pDirect[30]) {
+  PSlice pSlice = &pCtx->pCurDqLayer->sLayerInfo.sSliceInLayer;
+  PSliceHeader pSliceHeader = &pSlice->sSliceHeaderExt.sSliceHeader;
+  PDqLayer pCurDqLayer = pCtx->pCurDqLayer;
+  int32_t pRefCount[LIST_A];
+  int32_t iMbXy = pCurDqLayer->iMbXyIndex;
+  int16_t pMv[4] = { 0 };
+  int16_t pMvd[4] = { 0 };
+  int8_t iRef[LIST_A] = { 0 };
+  int32_t iPartIdx;
+  int16_t iMinVmv = pSliceHeader->pSps->pSLevelLimits->iMinVmv;
+  int16_t iMaxVmv = pSliceHeader->pSps->pSLevelLimits->iMaxVmv;
+  pRefCount[0] = pSliceHeader->uiRefCount[0];
+  pRefCount[1] = pSliceHeader->uiRefCount[1];
+
+  MbType mbType = pCurDqLayer->pMbType[iMbXy];
+
+  if (IS_DIRECT (mbType)) {
+
+    int16_t pMvDirect[LIST_A][2] = { { 0, 0 }, { 0, 0 } };
+    if (pSliceHeader->iDirectSpatialMvPredFlag) {
+      //predict direct spatial mv
+      PredMvBDirectSpatial (pCtx, pMvDirect, iRef);
+    } else {
+      //temporal direct 16x16 mode
+      ComputeColocated (pCtx);
+      PredBDirectTemporal (pCtx, pMvDirect, iRef);
+    }
+  } else if (IS_INTER_16x16 (mbType)) {
+    iPartIdx = 0;
+    for (int32_t listIdx = LIST_0; listIdx < LIST_A; ++listIdx) {
+      iRef[listIdx] = REF_NOT_IN_LIST;
+      if (IS_DIR (mbType, 0, listIdx)) {
+        WELS_READ_VERIFY (ParseRefIdxCabac (pCtx, pNeighAvail, pNonZeroCount, pRefIndex, pDirect, listIdx, iPartIdx,
+                                            pRefCount[listIdx], 0,
+                                            iRef[listIdx]));
+        if ((iRef[listIdx] < 0) || (iRef[listIdx] >= pRefCount[listIdx])
+            || (pCtx->sRefPic.pRefList[listIdx][iRef[listIdx]] == NULL)) { //error ref_idx
+          pCtx->bMbRefConcealed = true;
+          if (pCtx->pParam->eEcActiveIdc != ERROR_CON_DISABLE) {
+            iRef[listIdx] = 0;
+            pCtx->iErrorCode |= dsBitstreamError;
+          } else {
+            return GENERATE_ERROR_NO (ERR_LEVEL_MB_DATA, ERR_INFO_INVALID_REF_INDEX);
+          }
+        }
+        pCtx->bMbRefConcealed = pCtx->bRPLRError || pCtx->bMbRefConcealed || ! (pCtx->sRefPic.pRefList[listIdx][iRef[listIdx]]
+                                && pCtx->sRefPic.pRefList[listIdx][iRef[listIdx]]->bIsComplete);
+      }
+    }
+    for (int32_t listIdx = LIST_0; listIdx < LIST_A; ++listIdx) {
+      if (IS_DIR (mbType, 0, listIdx)) {
+        PredMv (pMotionVector, pRefIndex, listIdx, 0, 4, iRef[listIdx], pMv);
+        WELS_READ_VERIFY (ParseMvdInfoCabac (pCtx, pNeighAvail, pRefIndex, pMvdCache, iPartIdx, listIdx, 0, pMvd[0]));
+        WELS_READ_VERIFY (ParseMvdInfoCabac (pCtx, pNeighAvail, pRefIndex, pMvdCache, iPartIdx, listIdx, 1, pMvd[1]));
+        pMv[0] += pMvd[0];
+        pMv[1] += pMvd[1];
+        WELS_CHECK_SE_BOTH_WARNING (pMv[1], iMinVmv, iMaxVmv, "vertical mv");
+      } else {
+        * (uint32_t*)pMv = * (uint32_t*)pMvd = 0;
+      }
+      UpdateP16x16MotionInfo (pCurDqLayer, listIdx, iRef[listIdx], pMv);
+      UpdateP16x16MvdCabac (pCurDqLayer, pMvd, listIdx);
+    }
+  } else if (IS_INTER_16x8 (mbType)) {
+    int8_t ref_idx_list[LIST_A][2] = { {REF_NOT_IN_LIST, REF_NOT_IN_LIST}, { REF_NOT_IN_LIST, REF_NOT_IN_LIST } };
+    for (int32_t listIdx = LIST_0; listIdx < LIST_A; ++listIdx) {
+      for (int32_t i = 0; i < 2; ++i) {
+        iPartIdx = i << 3;
+        int8_t ref_idx = REF_NOT_IN_LIST;
+        if (IS_DIR (mbType, i, listIdx)) {
+          WELS_READ_VERIFY (ParseRefIdxCabac (pCtx, pNeighAvail, pNonZeroCount, pRefIndex, pDirect, listIdx, iPartIdx,
+                                              pRefCount[listIdx], 0, ref_idx));
+          if ((ref_idx < 0) || (ref_idx >= pRefCount[listIdx])
+              || (pCtx->sRefPic.pRefList[listIdx][ref_idx] == NULL)) { //error ref_idx
+            pCtx->bMbRefConcealed = true;
+            if (pCtx->pParam->eEcActiveIdc != ERROR_CON_DISABLE) {
+              ref_idx = 0;
+              pCtx->iErrorCode |= dsBitstreamError;
+            } else {
+              return GENERATE_ERROR_NO (ERR_LEVEL_MB_DATA, ERR_INFO_INVALID_REF_INDEX);
+            }
+          }
+          pCtx->bMbRefConcealed = pCtx->bRPLRError || pCtx->bMbRefConcealed || ! (pCtx->sRefPic.pRefList[listIdx][ref_idx]
+                                  && pCtx->sRefPic.pRefList[listIdx][ref_idx]->bIsComplete);
+        }
+        UpdateP16x8RefIdxCabac (pCurDqLayer, pRefIndex, iPartIdx, ref_idx, listIdx);
+        ref_idx_list[listIdx][i] = ref_idx;
+      }
+    }
+    for (int32_t listIdx = LIST_0; listIdx < LIST_A; ++listIdx) {
+      for (int32_t i = 0; i < 2; ++i) {
+        iPartIdx = i << 3;
+        int8_t ref_idx = ref_idx_list[listIdx][i];
+        if (IS_DIR (mbType, i, listIdx)) {
+          PredInter16x8Mv (pMotionVector, pRefIndex, listIdx, iPartIdx, ref_idx, pMv);
+          WELS_READ_VERIFY (ParseMvdInfoCabac (pCtx, pNeighAvail, pRefIndex, pMvdCache, iPartIdx, listIdx, 0, pMvd[0]));
+          WELS_READ_VERIFY (ParseMvdInfoCabac (pCtx, pNeighAvail, pRefIndex, pMvdCache, iPartIdx, listIdx, 1, pMvd[1]));
+          pMv[0] += pMvd[0];
+          pMv[1] += pMvd[1];
+          WELS_CHECK_SE_BOTH_WARNING (pMv[1], iMinVmv, iMaxVmv, "vertical mv");
+        } else {
+          * (uint32_t*)pMv = * (uint32_t*)pMvd = 0;
+        }
+        UpdateP16x8MotionInfo (pCurDqLayer, pMotionVector, pRefIndex, listIdx, iPartIdx, ref_idx, pMv);
+        UpdateP16x8MvdCabac (pCurDqLayer, pMvdCache, iPartIdx, pMvd, listIdx);
+      }
+    }
+  } else if (IS_INTER_8x16 (mbType)) {
+    int8_t ref_idx_list[LIST_A][2] = { { REF_NOT_IN_LIST, REF_NOT_IN_LIST }, { REF_NOT_IN_LIST, REF_NOT_IN_LIST } };
+    for (int32_t listIdx = LIST_0; listIdx < LIST_A; ++listIdx) {
+      for (int32_t i = 0; i < 2; ++i) {
+        iPartIdx = i << 2;
+        int8_t ref_idx = REF_NOT_IN_LIST;
+        if (IS_DIR (mbType, i, listIdx)) {
+          WELS_READ_VERIFY (ParseRefIdxCabac (pCtx, pNeighAvail, pNonZeroCount, pRefIndex, pDirect, listIdx, iPartIdx,
+                                              pRefCount[listIdx], 0, ref_idx));
+          if ((ref_idx < 0) || (ref_idx >= pRefCount[listIdx])
+              || (pCtx->sRefPic.pRefList[listIdx][ref_idx] == NULL)) { //error ref_idx
+            pCtx->bMbRefConcealed = true;
+            if (pCtx->pParam->eEcActiveIdc != ERROR_CON_DISABLE) {
+              ref_idx = 0;
+              pCtx->iErrorCode |= dsBitstreamError;
+            } else {
+              return GENERATE_ERROR_NO (ERR_LEVEL_MB_DATA, ERR_INFO_INVALID_REF_INDEX);
+            }
+          }
+          pCtx->bMbRefConcealed = pCtx->bRPLRError || pCtx->bMbRefConcealed || ! (pCtx->sRefPic.pRefList[listIdx][ref_idx]
+                                  && pCtx->sRefPic.pRefList[listIdx][ref_idx]->bIsComplete);
+        }
+        UpdateP8x16RefIdxCabac (pCurDqLayer, pRefIndex, iPartIdx, ref_idx, listIdx);
+        ref_idx_list[listIdx][i] = ref_idx;
+      }
+    }
+    for (int32_t listIdx = LIST_0; listIdx < LIST_A; ++listIdx) {
+      for (int32_t i = 0; i < 2; ++i) {
+        iPartIdx = i << 2;
+        int8_t ref_idx = ref_idx_list[listIdx][i];
+        if (IS_DIR (mbType, i, listIdx)) {
+          PredInter8x16Mv (pMotionVector, pRefIndex, listIdx, iPartIdx, ref_idx, pMv);
+          WELS_READ_VERIFY (ParseMvdInfoCabac (pCtx, pNeighAvail, pRefIndex, pMvdCache, iPartIdx, listIdx, 0, pMvd[0]));
+          WELS_READ_VERIFY (ParseMvdInfoCabac (pCtx, pNeighAvail, pRefIndex, pMvdCache, iPartIdx, listIdx, 1, pMvd[1]));
+          pMv[0] += pMvd[0];
+          pMv[1] += pMvd[1];
+          WELS_CHECK_SE_BOTH_WARNING (pMv[1], iMinVmv, iMaxVmv, "vertical mv");
+        } else {
+          * (uint32_t*)pMv = * (uint32_t*)pMvd = 0;
+        }
+        UpdateP8x16MotionInfo (pCurDqLayer, pMotionVector, pRefIndex, listIdx, iPartIdx, ref_idx, pMv);
+        UpdateP8x16MvdCabac (pCurDqLayer, pMvdCache, iPartIdx, pMvd, listIdx);
+      }
+    }
+  } else if (IS_Inter_8x8 (mbType)) {
+    int8_t pSubPartCount[4], pPartW[4];
+    uint32_t uiSubMbType;
+    //sub_mb_type, partition
+    int16_t pMvDirect[LIST_A][2] = { {0, 0}, {0, 0} };
+    bool has_direct_called = false;
+    SubMbType directSubMbType = 0;
+    for (int32_t i = 0; i < 4; i++) {
+      WELS_READ_VERIFY (ParseBSubMBTypeCabac (pCtx, pNeighAvail, uiSubMbType));
+      if (uiSubMbType > 13) { //invalid sub_mb_type
+        return GENERATE_ERROR_NO (ERR_LEVEL_MB_DATA, ERR_INFO_INVALID_SUB_MB_TYPE);
+      }
+//      pCurDqLayer->pSubMbType[iMbXy][i] = g_ksInterBSubMbTypeInfo[uiSubMbType].iType;
+      pSubPartCount[i] = g_ksInterBSubMbTypeInfo[uiSubMbType].iPartCount;
+      pPartW[i] = g_ksInterBSubMbTypeInfo[uiSubMbType].iPartWidth;
+
+      // Need modification when B picture add in, reference to 7.3.5
+      if (pSubPartCount[i] > 1)
+        pCurDqLayer->pNoSubMbPartSizeLessThan8x8Flag[iMbXy] = false;
+
+      if (IS_DIRECT (g_ksInterBSubMbTypeInfo[uiSubMbType].iType)) {
+        if (!has_direct_called) {
+          if (pSliceHeader->iDirectSpatialMvPredFlag) {
+            directSubMbType = PredMvBDirectSpatial (pCtx, pMvDirect, iRef);
+          } else {
+            //temporal direct mode
+            ComputeColocated (pCtx);
+            PredBDirectTemporal (pCtx, pMvDirect, iRef);
+          }
+          has_direct_called = true;
+        }
+        pCurDqLayer->pSubMbType[iMbXy][i] = directSubMbType;
+        if (IS_SUB_4x4 (pCurDqLayer->pSubMbType[iMbXy][i])) {
+          pSubPartCount[i] = 4;
+          pPartW[i] = 1;
+        }
+      } else {
+        pCurDqLayer->pSubMbType[iMbXy][i] = g_ksInterBSubMbTypeInfo[uiSubMbType].iType;
+      }
+    }
+    for (int32_t i = 0; i < 4; i++) { //Direct 8x8 Ref and mv
+
+      int16_t iIdx8 = i << 2;
+      if (IS_DIRECT (pCurDqLayer->pSubMbType[iMbXy][i])) {
+
+        int8_t iPartCount = pSubPartCount[i];
+        int16_t iPartIdx, iBlockW = pPartW[i];
+        uint8_t iScan4Idx, iCacheIdx, iColocIdx;
+        iCacheIdx = g_kuiCache30ScanIdx[iIdx8];
+
+        if (!pSliceHeader->iDirectSpatialMvPredFlag) {
+          iRef[LIST_1] = 0;
+          if (pCurDqLayer->iColocIntra[g_kuiScan4[iIdx8]]) {
+            iRef[LIST_0] = 0;
+          } else {
+            if (pCurDqLayer->iColocRefIndex[LIST_0][iIdx8] >= 0) {
+              iRef[LIST_0] = pCurDqLayer->iColocRefIndex[LIST_0][iIdx8];
+            } else {
+              iRef[LIST_0] = pCurDqLayer->iColocRefIndex[LIST_1][iIdx8];
+            }
+          }
+        }
+        for (int32_t j = 0; j < iPartCount; j++) {
+          iPartIdx = iIdx8 + j * iBlockW;
+          iColocIdx = g_kuiScan4[iPartIdx];
+          iScan4Idx = g_kuiScan4[iPartIdx];
+          iCacheIdx = g_kuiCache30ScanIdx[iPartIdx];
+
+          if (pSliceHeader->iDirectSpatialMvPredFlag) {
+            int16_t pMV[4] = { 0 };
+            if (IS_SUB_8x8 (pCurDqLayer->pSubMbType[iMbXy][i])) {
+              * (uint32_t*)pMV = * (uint32_t*)pMvDirect[LIST_0];
+              ST32 ((pMV + 2), LD32 (pMV));
+              ST64 (pCurDqLayer->pMv[LIST_0][iMbXy][iScan4Idx], LD64 (pMV));
+              ST64 (pCurDqLayer->pMv[LIST_0][iMbXy][iScan4Idx + 4], LD64 (pMV));
+              ST64 (pCurDqLayer->pMvd[LIST_0][iMbXy][iScan4Idx], 0);
+              ST64 (pCurDqLayer->pMvd[LIST_0][iMbXy][iScan4Idx + 4], 0);
+              ST64 (pMotionVector[LIST_0][iCacheIdx], LD64 (pMV));
+              ST64 (pMotionVector[LIST_0][iCacheIdx + 6], LD64 (pMV));
+              ST64 (pMvdCache[LIST_0][iCacheIdx], 0);
+              ST64 (pMvdCache[LIST_0][iCacheIdx + 6], 0);
+              * (uint32_t*)pMV = * (uint32_t*)pMvDirect[LIST_1];
+              ST32 ((pMV + 2), LD32 (pMV));
+              ST64 (pCurDqLayer->pMv[LIST_1][iMbXy][iScan4Idx], LD64 (pMV));
+              ST64 (pCurDqLayer->pMv[LIST_1][iMbXy][iScan4Idx + 4], LD64 (pMV));
+              ST64 (pCurDqLayer->pMvd[LIST_1][iMbXy][iScan4Idx], 0);
+              ST64 (pCurDqLayer->pMvd[LIST_1][iMbXy][iScan4Idx + 4], 0);
+              ST64 (pMotionVector[LIST_1][iCacheIdx], LD64 (pMV));
+              ST64 (pMotionVector[LIST_1][iCacheIdx + 6], LD64 (pMV));
+              ST64 (pMvdCache[LIST_1][iCacheIdx], 0);
+              ST64 (pMvdCache[LIST_1][iCacheIdx + 6], 0);
+            } else { //SUB_4x4
+              * (uint32_t*)pMV = * (uint32_t*)pMvDirect[LIST_0];
+              ST32 (pCurDqLayer->pMv[LIST_0][iMbXy][iScan4Idx], LD32 (pMV));
+              ST32 (pCurDqLayer->pMvd[LIST_0][iMbXy][iScan4Idx], 0);
+              ST32 (pMotionVector[LIST_0][iCacheIdx], LD32 (pMV));
+              ST32 (pMvdCache[LIST_0][iCacheIdx], 0);
+              * (uint32_t*)pMV = * (uint32_t*)pMvDirect[LIST_1];
+              ST32 (pCurDqLayer->pMv[LIST_1][iMbXy][iScan4Idx], LD32 (pMV));
+              ST32 (pCurDqLayer->pMvd[LIST_1][iMbXy][iScan4Idx], 0);
+              ST32 (pMotionVector[LIST_1][iCacheIdx], LD32 (pMV));
+              ST32 (pMvdCache[LIST_1][iCacheIdx], 0);
+            }
+
+            if ((* (int32_t*)pMvDirect[LIST_0] | * (int32_t*)pMvDirect[LIST_1])) {
+              bool bIsLongRef = pCtx->sRefPic.pRefList[LIST_1][0]->bIsLongRef;
+              uint32_t uiColZeroFlag = (0 == pCurDqLayer->iColocIntra[iColocIdx]) && !bIsLongRef &&
+                                       (pCurDqLayer->iColocRefIndex[LIST_0][iColocIdx] == 0 || (pCurDqLayer->iColocRefIndex[LIST_0][iColocIdx] < 0
+                                           && pCurDqLayer->iColocRefIndex[LIST_1][iColocIdx] == 0));
+              const int16_t (*mvColoc)[2] = pCurDqLayer->iColocRefIndex[LIST_0][iColocIdx] == 0 ? pCurDqLayer->iColocMv[LIST_0] :
+                                            pCurDqLayer->iColocMv[LIST_1];
+              const int16_t* mv = mvColoc[iColocIdx];
+              if (IS_SUB_8x8 (pCurDqLayer->pSubMbType[iMbXy][i])) {
+                if (uiColZeroFlag && ((unsigned) (mv[0] + 1) <= 2 && (unsigned) (mv[1] + 1) <= 2)) {
+                  if (iRef[LIST_0] == 0) {
+                    ST64 (pCurDqLayer->pMv[LIST_0][iMbXy][iScan4Idx], 0);
+                    ST64 (pCurDqLayer->pMv[LIST_0][iMbXy][iScan4Idx + 4], 0);
+                    ST64 (pCurDqLayer->pMvd[LIST_0][iMbXy][iScan4Idx], 0);
+                    ST64 (pCurDqLayer->pMvd[LIST_0][iMbXy][iScan4Idx + 4], 0);
+                    ST64 (pMotionVector[LIST_0][iCacheIdx], 0);
+                    ST64 (pMotionVector[LIST_0][iCacheIdx + 6], 0);
+                    ST64 (pMvdCache[LIST_0][iCacheIdx], 0);
+                    ST64 (pMvdCache[LIST_0][iCacheIdx + 6], 0);
+                  }
+
+                  if (iRef[LIST_1] == 0) {
+                    ST64 (pCurDqLayer->pMv[LIST_1][iMbXy][iScan4Idx], 0);
+                    ST64 (pCurDqLayer->pMv[LIST_1][iMbXy][iScan4Idx + 4], 0);
+                    ST64 (pCurDqLayer->pMvd[LIST_1][iMbXy][iScan4Idx], 0);
+                    ST64 (pCurDqLayer->pMvd[LIST_1][iMbXy][iScan4Idx + 4], 0);
+                    ST64 (pMotionVector[LIST_1][iCacheIdx], 0);
+                    ST64 (pMotionVector[LIST_1][iCacheIdx + 6], 0);
+                    ST64 (pMvdCache[LIST_1][iCacheIdx], 0);
+                    ST64 (pMvdCache[LIST_1][iCacheIdx + 6], 0);
+                  }
+                }
+              } else {
+                if (uiColZeroFlag && ((unsigned) (mv[0] + 1) <= 2 && (unsigned) (mv[1] + 1) <= 2)) {
+                  if (iRef[LIST_0] == 0) {
+                    ST32 (pCurDqLayer->pMv[LIST_0][iMbXy][iScan4Idx], 0);
+                    ST32 (pCurDqLayer->pMvd[LIST_0][iMbXy][iScan4Idx], 0);
+                    ST32 (pMotionVector[LIST_0][iCacheIdx], 0);
+                    ST32 (pMvdCache[LIST_0][iCacheIdx], 0);
+                  }
+                  if (iRef[LIST_1] == 0) {
+                    ST32 (pCurDqLayer->pMv[LIST_1][iMbXy][iScan4Idx], 0);
+                    ST32 (pCurDqLayer->pMvd[LIST_1][iMbXy][iScan4Idx], 0);
+                    ST32 (pMotionVector[LIST_1][iCacheIdx], 0);
+                    ST32 (pMvdCache[LIST_1][iCacheIdx], 0);
+                  }
+                }
+              }
+            }
+          } else {
+            int16_t (*mvColoc)[2] = pCurDqLayer->iColocMv[LIST_0];
+            int16_t* mv = mvColoc[iColocIdx];
+            int16_t pMV[4] = { 0 };
+            int16_t iMvp[LIST_A][2];
+            if (IS_SUB_8x8 (pCurDqLayer->pSubMbType[iMbXy][i])) {
+              iMvp[LIST_0][0] = (pSlice->iMvScale[LIST_0][iRef[LIST_0]] * mv[0] + 128) >> 8;
+              iMvp[LIST_0][1] = (pSlice->iMvScale[LIST_0][iRef[LIST_0]] * mv[1] + 128) >> 8;
+              ST32 (pMV, LD32 (iMvp[LIST_0]));
+              ST32 ((pMV + 2), LD32 (iMvp[LIST_0]));
+              ST64 (pCurDqLayer->pMv[LIST_0][iMbXy][iScan4Idx], LD64 (pMV));
+              ST64 (pCurDqLayer->pMv[LIST_0][iMbXy][iScan4Idx + 4], LD64 (pMV));
+              ST64 (pCurDqLayer->pMvd[LIST_0][iMbXy][iScan4Idx], 0);
+              ST64 (pCurDqLayer->pMvd[LIST_0][iMbXy][iScan4Idx + 4], 0);
+              iMvp[LIST_1][0] -= iMvp[LIST_0][0] - mv[0];
+              iMvp[LIST_1][1] -= iMvp[LIST_0][0] - mv[1];
+              ST32 (pMV, LD32 (iMvp[LIST_1]));
+              ST32 ((pMV + 2), LD32 (iMvp[LIST_1]));
+              ST64 (pCurDqLayer->pMv[LIST_1][iMbXy][iScan4Idx], LD64 (pMV));
+              ST64 (pCurDqLayer->pMv[LIST_1][iMbXy][iScan4Idx + 4], LD64 (pMV));
+              ST64 (pCurDqLayer->pMvd[LIST_1][iMbXy][iScan4Idx], 0);
+              ST64 (pCurDqLayer->pMvd[LIST_1][iMbXy][iScan4Idx + 4], 0);
+            } else { //SUB_4x4
+              iMvp[LIST_0][0] = (pSlice->iMvScale[LIST_0][iRef[LIST_0]] * mv[0] + 128) >> 8;
+              iMvp[LIST_0][1] = (pSlice->iMvScale[LIST_0][iRef[LIST_0]] * mv[1] + 128) >> 8;
+              ST32 (pCurDqLayer->pMv[LIST_0][iMbXy][iScan4Idx], LD32 (iMvp[LIST_0]));
+              ST32 (pCurDqLayer->pMvd[LIST_0][iMbXy][iScan4Idx], 0);
+              iMvp[LIST_1][0] -= iMvp[LIST_0][0] - mv[0];
+              iMvp[LIST_1][1] -= iMvp[LIST_0][0] - mv[1];
+              ST32 (pCurDqLayer->pMv[LIST_1][iMbXy][iScan4Idx], LD32 (iMvp[LIST_1]));
+              ST32 (pCurDqLayer->pMvd[LIST_1][iMbXy][iScan4Idx], 0);
+            }
+          }
+        }
+      }
+    }
+    //ref no-direct
+    int8_t ref_idx_list[LIST_A][4] = { {REF_NOT_IN_LIST, REF_NOT_IN_LIST}, { REF_NOT_IN_LIST, REF_NOT_IN_LIST } };
+    for (int32_t listIdx = LIST_0; listIdx < LIST_A; ++listIdx) {
+      for (int32_t i = 0; i < 4; i++) {
+        int16_t iIdx8 = i << 2;
+        int32_t subMbType = pCurDqLayer->pSubMbType[iMbXy][i];
+        int8_t iref = REF_NOT_IN_LIST;
+        if (IS_DIRECT (subMbType)) {
+          if (pSliceHeader->iDirectSpatialMvPredFlag) {
+            iref = iRef[listIdx];
+          } else {
+            iref = 0;
+            if (listIdx == LIST_0) {
+              if (!pCurDqLayer->iColocIntra[g_kuiScan4[iIdx8]]) {
+                if (pCurDqLayer->iColocRefIndex[LIST_0][iIdx8] >= 0) {
+                  iref = pCurDqLayer->iColocRefIndex[LIST_0][iIdx8];
+                } else {
+                  iref = pCurDqLayer->iColocRefIndex[LIST_1][iIdx8];
+                }
+              }
+            }
+          }
+          UpdateP8x8DirectCabac (pCurDqLayer, iIdx8);
+        } else {
+          if (IS_DIR (subMbType, 0, listIdx)) {
+            WELS_READ_VERIFY (ParseRefIdxCabac (pCtx, pNeighAvail, pNonZeroCount, pRefIndex, pDirect, listIdx, iIdx8,
+                                                pRefCount[listIdx], 1,
+                                                iref));
+            if ((iref < 0) || (iref >= pRefCount[listIdx]) || (pCtx->sRefPic.pRefList[listIdx][iref] == NULL)) { //error ref_idx
+              pCtx->bMbRefConcealed = true;
+              if (pCtx->pParam->eEcActiveIdc != ERROR_CON_DISABLE) {
+                iref = 0;
+                pCtx->iErrorCode |= dsBitstreamError;
+              } else {
+                return GENERATE_ERROR_NO (ERR_LEVEL_MB_DATA, ERR_INFO_INVALID_REF_INDEX);
+              }
+            }
+            pCtx->bMbRefConcealed = pCtx->bRPLRError || pCtx->bMbRefConcealed || ! (pCtx->sRefPic.pRefList[listIdx][iref]
+                                    && pCtx->sRefPic.pRefList[listIdx][iref]->bIsComplete);
+          }
+        }
+        UpdateP8x8RefIdxCabac (pCurDqLayer, pRefIndex, iIdx8, iref, listIdx);
+        ref_idx_list[listIdx][i] = iref;
+      }
+    }
+    //mv
+    for (int32_t listIdx = LIST_0; listIdx < LIST_A; ++listIdx) {
+      for (int32_t i = 0; i < 4; i++) {
+        int8_t iPartCount = pSubPartCount[i];
+        int16_t iPartIdx, iBlockW = pPartW[i];
+        uint8_t iScan4Idx, iCacheIdx;
+
+        iCacheIdx = g_kuiCache30ScanIdx[i << 2];
+
+        int8_t iref = ref_idx_list[listIdx][i];
+        pRefIndex[listIdx][iCacheIdx] = pRefIndex[listIdx][iCacheIdx + 1]
+                                        = pRefIndex[listIdx][iCacheIdx + 6] = pRefIndex[listIdx][iCacheIdx + 7] = iref;
+
+        uint32_t subMbType = pCurDqLayer->pSubMbType[iMbXy][i];
+        if (IS_DIRECT (subMbType)) {
+          continue;
+        }
+        bool is_dir = IS_DIR (subMbType, 0, listIdx) > 0;
+        for (int32_t j = 0; j < iPartCount; j++) {
+          iPartIdx = (i << 2) + j * iBlockW;
+          iScan4Idx = g_kuiScan4[iPartIdx];
+          iCacheIdx = g_kuiCache30ScanIdx[iPartIdx];
+          if (is_dir) {
+            PredMv (pMotionVector, pRefIndex, listIdx, iPartIdx, iBlockW, iref, pMv);
+            WELS_READ_VERIFY (ParseMvdInfoCabac (pCtx, pNeighAvail, pRefIndex, pMvdCache, iPartIdx, listIdx, 0, pMvd[0]));
+            WELS_READ_VERIFY (ParseMvdInfoCabac (pCtx, pNeighAvail, pRefIndex, pMvdCache, iPartIdx, listIdx, 1, pMvd[1]));
+            pMv[0] += pMvd[0];
+            pMv[1] += pMvd[1];
+            WELS_CHECK_SE_BOTH_WARNING (pMv[1], iMinVmv, iMaxVmv, "vertical mv");
+          } else {
+            * (uint32_t*)pMv = * (uint32_t*)pMvd = 0;
+          }
+          if (IS_SUB_8x8 (subMbType)) { //MB_TYPE_8x8
+            ST32 ((pMv + 2), LD32 (pMv));
+            ST32 ((pMvd + 2), LD32 (pMvd));
+            ST64 (pCurDqLayer->pMv[listIdx][iMbXy][iScan4Idx], LD64 (pMv));
+            ST64 (pCurDqLayer->pMv[listIdx][iMbXy][iScan4Idx + 4], LD64 (pMv));
+            ST64 (pCurDqLayer->pMvd[listIdx][iMbXy][iScan4Idx], LD64 (pMvd));
+            ST64 (pCurDqLayer->pMvd[listIdx][iMbXy][iScan4Idx + 4], LD64 (pMvd));
+            ST64 (pMotionVector[listIdx][iCacheIdx], LD64 (pMv));
+            ST64 (pMotionVector[listIdx][iCacheIdx + 6], LD64 (pMv));
+            ST64 (pMvdCache[listIdx][iCacheIdx], LD64 (pMvd));
+            ST64 (pMvdCache[listIdx][iCacheIdx + 6], LD64 (pMvd));
+          } else if (IS_SUB_4x4 (subMbType)) { //MB_TYPE_4x4
+            ST32 (pCurDqLayer->pMv[listIdx][iMbXy][iScan4Idx], LD32 (pMv));
+            ST32 (pCurDqLayer->pMvd[listIdx][iMbXy][iScan4Idx], LD32 (pMvd));
+            ST32 (pMotionVector[listIdx][iCacheIdx], LD32 (pMv));
+            ST32 (pMvdCache[listIdx][iCacheIdx], LD32 (pMvd));
+          } else if (IS_SUB_4x8 (subMbType)) { //MB_TYPE_4x8 5, 7, 9
+            ST32 (pCurDqLayer->pMv[listIdx][iMbXy][iScan4Idx], LD32 (pMv));
+            ST32 (pCurDqLayer->pMv[listIdx][iMbXy][iScan4Idx + 4], LD32 (pMv));
+            ST32 (pCurDqLayer->pMvd[listIdx][iMbXy][iScan4Idx], LD32 (pMvd));
+            ST32 (pCurDqLayer->pMvd[listIdx][iMbXy][iScan4Idx + 4], LD32 (pMvd));
+            ST32 (pMotionVector[listIdx][iCacheIdx], LD32 (pMv));
+            ST32 (pMotionVector[listIdx][iCacheIdx + 6], LD32 (pMv));
+            ST32 (pMvdCache[listIdx][iCacheIdx], LD32 (pMvd));
+            ST32 (pMvdCache[listIdx][iCacheIdx + 6], LD32 (pMvd));
+          } else { //MB_TYPE_8x4 4, 6, 8
+            ST32 ((pMv + 2), LD32 (pMv));
+            ST32 ((pMvd + 2), LD32 (pMvd));
+            ST64 (pCurDqLayer->pMv[listIdx][iMbXy][iScan4Idx], LD64 (pMv));
+            ST64 (pCurDqLayer->pMvd[listIdx][iMbXy][iScan4Idx], LD64 (pMvd));
+            ST64 (pMotionVector[listIdx][iCacheIdx], LD64 (pMv));
+            ST64 (pMvdCache[listIdx][iCacheIdx], LD64 (pMvd));
+          }
+        }
+      }
+    }
+  }
+  return ERR_NONE;
+}
+
+int32_t ParseRefIdxCabac (PWelsDecoderContext pCtx, PWelsNeighAvail pNeighAvail, uint8_t* nzc,
+                          int8_t ref_idx[LIST_A][30], int8_t direct[30],
+                          int32_t iListIdx, int32_t iZOrderIdx, int32_t iActiveRefNum, int32_t b8mode, int8_t& iRefIdxVal) {
+  if (iActiveRefNum == 1) {
+    iRefIdxVal = 0;
+    return ERR_NONE;
+  }
+  uint32_t uiCode;
+  int32_t iIdxA = 0, iIdxB = 0;
+  int32_t iCtxInc = 0;
+  int8_t* pRefIdxInMB = pCtx->pCurDqLayer->pRefIndex[iListIdx][pCtx->pCurDqLayer->iMbXyIndex];
+  int8_t* pDirect = pCtx->pCurDqLayer->pDirect[pCtx->pCurDqLayer->iMbXyIndex];
+  if (iZOrderIdx == 0) {
+    iIdxB = (pNeighAvail->iTopAvail && pNeighAvail->iTopType != MB_TYPE_INTRA_PCM
+             && ref_idx[iListIdx][g_kuiCache30ScanIdx[iZOrderIdx] - 6] > 0);
+    iIdxA = (pNeighAvail->iLeftAvail && pNeighAvail->iLeftType != MB_TYPE_INTRA_PCM
+             && ref_idx[iListIdx][g_kuiCache30ScanIdx[iZOrderIdx] - 1] > 0);
+    if (pCtx->eSliceType == B_SLICE) {
+      if (iIdxB > 0 && direct[g_kuiCache30ScanIdx[iZOrderIdx] - 6] == 0) {
+        iCtxInc += 2;
+      }
+      if (iIdxA > 0 && direct[g_kuiCache30ScanIdx[iZOrderIdx] - 1] == 0) {
+        iCtxInc++;
+      }
+    }
+  } else if (iZOrderIdx == 4) {
+    iIdxB = (pNeighAvail->iTopAvail && pNeighAvail->iTopType != MB_TYPE_INTRA_PCM
+             && ref_idx[iListIdx][g_kuiCache30ScanIdx[iZOrderIdx] - 6] > 0);
+    iIdxA = pRefIdxInMB[g_kuiScan4[iZOrderIdx] - 1] > 0;
+    if (pCtx->eSliceType == B_SLICE) {
+      if (iIdxB > 0 && direct[g_kuiCache30ScanIdx[iZOrderIdx] - 6] == 0) {
+        iCtxInc += 2;
+      }
+      if (iIdxA > 0 && pDirect[g_kuiScan4[iZOrderIdx] - 1] == 0) {
+        iCtxInc ++;
+      }
+    }
+  } else if (iZOrderIdx == 8) {
+
+    iIdxB = pRefIdxInMB[g_kuiScan4[iZOrderIdx] - 4] > 0;
+    iIdxA = (pNeighAvail->iLeftAvail && pNeighAvail->iLeftType != MB_TYPE_INTRA_PCM
+             && ref_idx[iListIdx][g_kuiCache30ScanIdx[iZOrderIdx] - 1] > 0);
+    if (pCtx->eSliceType == B_SLICE) {
+      if (iIdxB > 0 && pDirect[g_kuiScan4[iZOrderIdx] - 4] == 0) {
+        iCtxInc += 2;
+      }
+      if (iIdxA > 0 && direct[g_kuiCache30ScanIdx[iZOrderIdx] - 1] == 0) {
+        iCtxInc++;
+      }
+    }
+  } else {
+    iIdxB = pRefIdxInMB[g_kuiScan4[iZOrderIdx] - 4] > 0;
+    iIdxA = pRefIdxInMB[g_kuiScan4[iZOrderIdx] - 1] > 0;
+    if (pCtx->eSliceType == B_SLICE) {
+      if (iIdxB > 0 && pDirect[g_kuiScan4[iZOrderIdx] - 4] == 0) {
+        iCtxInc += 2;
+      }
+      if (iIdxA > 0 && pDirect[g_kuiScan4[iZOrderIdx] - 1] == 0) {
+        iCtxInc++;
+      }
+    }
+  }
+  if (pCtx->eSliceType != B_SLICE) {
+    iCtxInc = iIdxA + (iIdxB << 1);
+  }
+
+  WELS_READ_VERIFY (DecodeBinCabac (pCtx->pCabacDecEngine, pCtx->pCabacCtx + NEW_CTX_OFFSET_REF_NO + iCtxInc, uiCode));
+  if (uiCode) {
+    WELS_READ_VERIFY (DecodeUnaryBinCabac (pCtx->pCabacDecEngine, pCtx->pCabacCtx + NEW_CTX_OFFSET_REF_NO + 4, 1, uiCode));
+    ++uiCode;
+  }
+  iRefIdxVal = (int8_t) uiCode;
+  return ERR_NONE;
+}
+
+int32_t ParseMvdInfoCabac (PWelsDecoderContext pCtx, PWelsNeighAvail pNeighAvail, int8_t pRefIndex[LIST_A][30],
+                           int16_t pMvdCache[LIST_A][30][2], int32_t index, int8_t iListIdx, int8_t iMvComp, int16_t& iMvdVal) {
+  uint32_t uiCode;
+  int32_t iIdxA = 0;
+  //int32_t sym;
+  PWelsCabacCtx pBinCtx = pCtx->pCabacCtx + NEW_CTX_OFFSET_MVD + iMvComp * CTX_NUM_MVD;
+  iMvdVal = 0;
+
+  if (pRefIndex[iListIdx][g_kuiCache30ScanIdx[index] - 6] >= 0)
+    iIdxA = WELS_ABS (pMvdCache[iListIdx][g_kuiCache30ScanIdx[index] - 6][iMvComp]);
+  if (pRefIndex[iListIdx][g_kuiCache30ScanIdx[index] - 1] >= 0)
+    iIdxA += WELS_ABS (pMvdCache[iListIdx][g_kuiCache30ScanIdx[index] - 1][iMvComp]);
+
+  int32_t iCtxInc = 0;
+  if (iIdxA >= 3)
+    iCtxInc = 1 + (iIdxA > 32);
+
+  WELS_READ_VERIFY (DecodeBinCabac (pCtx->pCabacDecEngine,  pBinCtx + iCtxInc, uiCode));
+  if (uiCode) {
+    WELS_READ_VERIFY (DecodeUEGMvCabac (pCtx->pCabacDecEngine, pBinCtx + 3, 3, uiCode));
+    iMvdVal = (int16_t) (uiCode + 1);
+    WELS_READ_VERIFY (DecodeBypassCabac (pCtx->pCabacDecEngine, uiCode));
+    if (uiCode) {
+      iMvdVal = -iMvdVal;
+    }
+  } else {
+    iMvdVal = 0;
+  }
+  return ERR_NONE;
+}
+
+int32_t ParseCbpInfoCabac (PWelsDecoderContext pCtx, PWelsNeighAvail pNeighAvail, uint32_t& uiCbp) {
+  int32_t iIdxA = 0, iIdxB = 0, pALeftMb[2], pBTopMb[2];
+  uiCbp = 0;
+  uint32_t pCbpBit[6];
+  int32_t iCtxInc;
+
+  //Luma: bit by bit for 4 8x8 blocks in z-order
+  pBTopMb[0]  = pNeighAvail->iTopAvail  && pNeighAvail->iTopType  != MB_TYPE_INTRA_PCM
+                && ((pNeighAvail->iTopCbp  & (1 << 2)) == 0);
+  pBTopMb[1]  = pNeighAvail->iTopAvail  && pNeighAvail->iTopType  != MB_TYPE_INTRA_PCM
+                && ((pNeighAvail->iTopCbp  & (1 << 3)) == 0);
+  pALeftMb[0] = pNeighAvail->iLeftAvail && pNeighAvail->iLeftType != MB_TYPE_INTRA_PCM
+                && ((pNeighAvail->iLeftCbp & (1 << 1)) == 0);
+  pALeftMb[1] = pNeighAvail->iLeftAvail && pNeighAvail->iLeftType != MB_TYPE_INTRA_PCM
+                && ((pNeighAvail->iLeftCbp & (1 << 3)) == 0);
+
+  //left_top 8x8 block
+  iCtxInc = pALeftMb[0] + (pBTopMb[0] << 1);
+  WELS_READ_VERIFY (DecodeBinCabac (pCtx->pCabacDecEngine, pCtx->pCabacCtx + NEW_CTX_OFFSET_CBP + iCtxInc, pCbpBit[0]));
+  if (pCbpBit[0])
+    uiCbp += 0x01;
+
+  //right_top 8x8 block
+  iIdxA = !pCbpBit[0];
+  iCtxInc = iIdxA + (pBTopMb[1] << 1);
+  WELS_READ_VERIFY (DecodeBinCabac (pCtx->pCabacDecEngine, pCtx->pCabacCtx + NEW_CTX_OFFSET_CBP + iCtxInc, pCbpBit[1]));
+  if (pCbpBit[1])
+    uiCbp += 0x02;
+
+  //left_bottom 8x8 block
+  iIdxB = !pCbpBit[0];
+  iCtxInc = pALeftMb[1] + (iIdxB << 1);
+  WELS_READ_VERIFY (DecodeBinCabac (pCtx->pCabacDecEngine, pCtx->pCabacCtx + NEW_CTX_OFFSET_CBP + iCtxInc, pCbpBit[2]));
+  if (pCbpBit[2])
+    uiCbp += 0x04;
+
+  //right_bottom 8x8 block
+  iIdxB = !pCbpBit[1];
+  iIdxA = !pCbpBit[2];
+  iCtxInc = iIdxA + (iIdxB << 1);
+  WELS_READ_VERIFY (DecodeBinCabac (pCtx->pCabacDecEngine, pCtx->pCabacCtx + NEW_CTX_OFFSET_CBP + iCtxInc, pCbpBit[3]));
+  if (pCbpBit[3])
+    uiCbp += 0x08;
+
+  if (pCtx->pSps->uiChromaFormatIdc == 0)//monochroma
+    return ERR_NONE;
+
+
+  //Chroma: bit by bit
+  iIdxB = pNeighAvail->iTopAvail  && (pNeighAvail->iTopType  == MB_TYPE_INTRA_PCM || (pNeighAvail->iTopCbp  >> 4));
+  iIdxA = pNeighAvail->iLeftAvail && (pNeighAvail->iLeftType == MB_TYPE_INTRA_PCM || (pNeighAvail->iLeftCbp >> 4));
+
+  //BitIdx = 0
+  iCtxInc = iIdxA + (iIdxB << 1);
+  WELS_READ_VERIFY (DecodeBinCabac (pCtx->pCabacDecEngine, pCtx->pCabacCtx + NEW_CTX_OFFSET_CBP + CTX_NUM_CBP + iCtxInc,
+                                    pCbpBit[4]));
+
+  //BitIdx = 1
+  if (pCbpBit[4]) {
+    iIdxB = pNeighAvail->iTopAvail  && (pNeighAvail->iTopType  == MB_TYPE_INTRA_PCM || (pNeighAvail->iTopCbp  >> 4) == 2);
+    iIdxA = pNeighAvail->iLeftAvail && (pNeighAvail->iLeftType == MB_TYPE_INTRA_PCM || (pNeighAvail->iLeftCbp >> 4) == 2);
+    iCtxInc = iIdxA + (iIdxB << 1);
+    WELS_READ_VERIFY (DecodeBinCabac (pCtx->pCabacDecEngine,
+                                      pCtx->pCabacCtx + NEW_CTX_OFFSET_CBP + 2 * CTX_NUM_CBP + iCtxInc,
+                                      pCbpBit[5]));
+    uiCbp += 1 << (4 + pCbpBit[5]);
+
+  }
+
+  return ERR_NONE;
+}
+
+int32_t ParseDeltaQpCabac (PWelsDecoderContext pCtx, int32_t& iQpDelta) {
+  uint32_t uiCode;
+  PSlice pCurrSlice = & (pCtx->pCurDqLayer->sLayerInfo.sSliceInLayer);
+  iQpDelta = 0;
+  PWelsCabacCtx pBinCtx = pCtx->pCabacCtx + NEW_CTX_OFFSET_DELTA_QP;
+  int32_t iCtxInc = (pCurrSlice->iLastDeltaQp != 0);
+  WELS_READ_VERIFY (DecodeBinCabac (pCtx->pCabacDecEngine, pBinCtx + iCtxInc, uiCode));
+  if (uiCode != 0) {
+    WELS_READ_VERIFY (DecodeUnaryBinCabac (pCtx->pCabacDecEngine, pBinCtx + 2, 1, uiCode));
+    uiCode++;
+    iQpDelta = (uiCode + 1) >> 1;
+    if ((uiCode & 1) == 0)
+      iQpDelta = - iQpDelta;
+  }
+  pCurrSlice->iLastDeltaQp = iQpDelta;
+  return ERR_NONE;
+}
+
+int32_t ParseCbfInfoCabac (PWelsNeighAvail pNeighAvail, uint8_t* pNzcCache, int32_t iZIndex, int32_t iResProperty,
+                           PWelsDecoderContext pCtx, uint32_t& uiCbfBit) {
+  int8_t nA, nB/*, zigzag_idx = 0*/;
+  int32_t iCurrBlkXy = pCtx->pCurDqLayer->iMbXyIndex;
+  int32_t iTopBlkXy = iCurrBlkXy - pCtx->pCurDqLayer->iMbWidth; //default value: MB neighboring
+  int32_t iLeftBlkXy = iCurrBlkXy - 1; //default value: MB neighboring
+  uint16_t* pCbfDc = pCtx->pCurDqLayer->pCbfDc;
+  uint32_t* pMbType = pCtx->pCurDqLayer->pMbType;
+  int32_t iCtxInc;
+  uiCbfBit = 0;
+  nA = nB = (int8_t)!!IS_INTRA (pMbType[iCurrBlkXy]);
+
+  if (iResProperty == I16_LUMA_DC || iResProperty == CHROMA_DC_U || iResProperty == CHROMA_DC_V) { //DC
+    if (pNeighAvail->iTopAvail)
+      nB = (pMbType[iTopBlkXy] == MB_TYPE_INTRA_PCM) || ((pCbfDc[iTopBlkXy] >> iResProperty) & 1);
+    if (pNeighAvail->iLeftAvail)
+      nA = (pMbType[iLeftBlkXy] == MB_TYPE_INTRA_PCM) || ((pCbfDc[iLeftBlkXy] >> iResProperty) & 1);
+    iCtxInc = nA + (nB << 1);
+    WELS_READ_VERIFY (DecodeBinCabac (pCtx->pCabacDecEngine,
+                                      pCtx->pCabacCtx + NEW_CTX_OFFSET_CBF + g_kBlockCat2CtxOffsetCBF[iResProperty] + iCtxInc, uiCbfBit));
+    if (uiCbfBit)
+      pCbfDc[iCurrBlkXy] |= (1 << iResProperty);
+  } else { //AC
+    //for 4x4 blk, make sure blk-idx is correct
+    if (pNzcCache[g_kCacheNzcScanIdx[iZIndex] - 8] != 0xff) { //top blk available
+      if (g_kTopBlkInsideMb[iZIndex])
+        iTopBlkXy = iCurrBlkXy;
+      nB = pNzcCache[g_kCacheNzcScanIdx[iZIndex] - 8] || pMbType[iTopBlkXy]  == MB_TYPE_INTRA_PCM;
+    }
+    if (pNzcCache[g_kCacheNzcScanIdx[iZIndex] - 1] != 0xff) { //left blk available
+      if (g_kLeftBlkInsideMb[iZIndex])
+        iLeftBlkXy = iCurrBlkXy;
+      nA = pNzcCache[g_kCacheNzcScanIdx[iZIndex] - 1] || pMbType[iLeftBlkXy] == MB_TYPE_INTRA_PCM;
+    }
+
+    iCtxInc = nA + (nB << 1);
+    WELS_READ_VERIFY (DecodeBinCabac (pCtx->pCabacDecEngine,
+                                      pCtx->pCabacCtx + NEW_CTX_OFFSET_CBF + g_kBlockCat2CtxOffsetCBF[iResProperty] + iCtxInc, uiCbfBit));
+  }
+  return ERR_NONE;
+}
+
+int32_t ParseSignificantMapCabac (int32_t* pSignificantMap, int32_t iResProperty, PWelsDecoderContext pCtx,
+                                  uint32_t& uiCoeffNum) {
+  uint32_t uiCode;
+
+  PWelsCabacCtx pMapCtx  = pCtx->pCabacCtx + (iResProperty == LUMA_DC_AC_8 ? NEW_CTX_OFFSET_MAP_8x8 : NEW_CTX_OFFSET_MAP)
+                           + g_kBlockCat2CtxOffsetMap [iResProperty];
+  PWelsCabacCtx pLastCtx = pCtx->pCabacCtx + (iResProperty == LUMA_DC_AC_8 ? NEW_CTX_OFFSET_LAST_8x8 :
+                           NEW_CTX_OFFSET_LAST) + g_kBlockCat2CtxOffsetLast[iResProperty];
+
+
+  int32_t i;
+  uiCoeffNum = 0;
+  int32_t i0 = 0;
+  int32_t i1 = g_kMaxPos[iResProperty];
+
+  int32_t iCtx;
+
+  for (i = i0; i < i1; ++i) {
+    iCtx = (iResProperty == LUMA_DC_AC_8 ? g_kuiIdx2CtxSignificantCoeffFlag8x8[i] : i);
+    //read significant
+    WELS_READ_VERIFY (DecodeBinCabac (pCtx->pCabacDecEngine, pMapCtx + iCtx, uiCode));
+    if (uiCode) {
+      * (pSignificantMap++) = 1;
+      ++ uiCoeffNum;
+      //read last significant
+      iCtx = (iResProperty == LUMA_DC_AC_8 ? g_kuiIdx2CtxLastSignificantCoeffFlag8x8[i] : i);
+      WELS_READ_VERIFY (DecodeBinCabac (pCtx->pCabacDecEngine, pLastCtx + iCtx, uiCode));
+      if (uiCode) {
+        memset (pSignificantMap, 0, (i1 - i) * sizeof (int32_t));
+        return ERR_NONE;
+      }
+    } else
+      * (pSignificantMap++) = 0;
+  }
+
+  //deal with last pSignificantMap if no data
+  //if(i < i1+1)
+  {
+    *pSignificantMap = 1;
+    ++uiCoeffNum;
+  }
+
+  return ERR_NONE;
+}
+
+int32_t ParseSignificantCoeffCabac (int32_t* pSignificant, int32_t iResProperty, PWelsDecoderContext pCtx) {
+  uint32_t uiCode;
+  PWelsCabacCtx pOneCtx = pCtx->pCabacCtx + (iResProperty == LUMA_DC_AC_8 ? NEW_CTX_OFFSET_ONE_8x8 : NEW_CTX_OFFSET_ONE) +
+                          g_kBlockCat2CtxOffsetOne[iResProperty];
+  PWelsCabacCtx pAbsCtx = pCtx->pCabacCtx + (iResProperty == LUMA_DC_AC_8 ? NEW_CTX_OFFSET_ABS_8x8 : NEW_CTX_OFFSET_ABS) +
+                          g_kBlockCat2CtxOffsetAbs[iResProperty];
+
+  const int16_t iMaxType = g_kMaxC2[iResProperty];
+  int32_t i = g_kMaxPos[iResProperty];
+  int32_t* pCoff = pSignificant + i;
+  int32_t c1 = 1;
+  int32_t c2 = 0;
+  for (; i >= 0; --i) {
+    if (*pCoff != 0) {
+      WELS_READ_VERIFY (DecodeBinCabac (pCtx->pCabacDecEngine, pOneCtx + c1, uiCode));
+      *pCoff += uiCode;
+      if (*pCoff == 2) {
+        WELS_READ_VERIFY (DecodeUEGLevelCabac (pCtx->pCabacDecEngine, pAbsCtx + c2, uiCode));
+        *pCoff += uiCode;
+        ++c2;
+        c2 = WELS_MIN (c2, iMaxType);
+        c1 = 0;
+      } else if (c1) {
+        ++c1;
+        c1 = WELS_MIN (c1, 4);
+      }
+      WELS_READ_VERIFY (DecodeBypassCabac (pCtx->pCabacDecEngine, uiCode));
+      if (uiCode)
+        *pCoff = - *pCoff;
+    }
+    pCoff--;
+  }
+  return ERR_NONE;
+}
+
+int32_t ParseResidualBlockCabac8x8 (PWelsNeighAvail pNeighAvail, uint8_t* pNonZeroCountCache, SBitStringAux* pBsAux,
+                                    int32_t iIndex, int32_t iMaxNumCoeff, const uint8_t* pScanTable, int32_t iResProperty,
+                                    short* sTCoeff, /*int mb_mode*/ uint8_t uiQp, PWelsDecoderContext pCtx) {
+  uint32_t uiTotalCoeffNum = 0;
+  uint32_t uiCbpBit;
+  int32_t pSignificantMap[64] = {0};
+
+  int32_t iMbResProperty = 0;
+  GetMbResProperty (&iMbResProperty, &iResProperty, false);
+  const uint16_t* pDeQuantMul = (pCtx->bUseScalingList) ? pCtx->pDequant_coeff8x8[iMbResProperty - 6][uiQp] :
+                                g_kuiDequantCoeff8x8[uiQp];
+
+  uiCbpBit = 1; // for 8x8, MaxNumCoeff == 64 && uiCbpBit == 1
+  if (uiCbpBit) { //has coeff
+    WELS_READ_VERIFY (ParseSignificantMapCabac (pSignificantMap, iResProperty, pCtx, uiTotalCoeffNum));
+    WELS_READ_VERIFY (ParseSignificantCoeffCabac (pSignificantMap, iResProperty, pCtx));
+  }
+
+  pNonZeroCountCache[g_kCacheNzcScanIdx[iIndex]] =
+    pNonZeroCountCache[g_kCacheNzcScanIdx[iIndex + 1]] =
+      pNonZeroCountCache[g_kCacheNzcScanIdx[iIndex + 2]] =
+        pNonZeroCountCache[g_kCacheNzcScanIdx[iIndex + 3]] = (uint8_t)uiTotalCoeffNum;
+  if (uiTotalCoeffNum == 0) {
+    return ERR_NONE;
+  }
+  int32_t j = 0, i;
+  if (iResProperty == LUMA_DC_AC_8) {
+    do {
+      if (pSignificantMap[j] != 0) {
+        i = pScanTable[ j ];
+        sTCoeff[i] = uiQp >= 36 ? ((pSignificantMap[j] * pDeQuantMul[i]) * (1 << (uiQp / 6 - 6))) : ((
+                       pSignificantMap[j] * pDeQuantMul[i] + (1 << (5 - uiQp / 6))) >> (6 - uiQp / 6));
+      }
+      ++j;
+    } while (j < 64);
+  }
+
+  return ERR_NONE;
+}
+
+int32_t ParseResidualBlockCabac (PWelsNeighAvail pNeighAvail, uint8_t* pNonZeroCountCache, SBitStringAux* pBsAux,
+                                 int32_t iIndex, int32_t iMaxNumCoeff,
+                                 const uint8_t* pScanTable, int32_t iResProperty, short* sTCoeff, /*int mb_mode*/ uint8_t uiQp,
+                                 PWelsDecoderContext pCtx) {
+  int32_t iCurNzCacheIdx;
+  uint32_t uiTotalCoeffNum = 0;
+  uint32_t uiCbpBit;
+  int32_t pSignificantMap[16] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
+
+  int32_t iMbResProperty = 0;
+  GetMbResProperty (&iMbResProperty, &iResProperty, false);
+  const uint16_t* pDeQuantMul = (pCtx->bUseScalingList) ? pCtx->pDequant_coeff4x4[iMbResProperty][uiQp] :
+                                g_kuiDequantCoeff[uiQp];
+
+  WELS_READ_VERIFY (ParseCbfInfoCabac (pNeighAvail, pNonZeroCountCache, iIndex, iResProperty, pCtx, uiCbpBit));
+  if (uiCbpBit) { //has coeff
+    WELS_READ_VERIFY (ParseSignificantMapCabac (pSignificantMap, iResProperty, pCtx, uiTotalCoeffNum));
+    WELS_READ_VERIFY (ParseSignificantCoeffCabac (pSignificantMap, iResProperty, pCtx));
+  }
+
+  iCurNzCacheIdx = g_kCacheNzcScanIdx[iIndex];
+  pNonZeroCountCache[iCurNzCacheIdx] = (uint8_t)uiTotalCoeffNum;
+  if (uiTotalCoeffNum == 0) {
+    return ERR_NONE;
+  }
+  int32_t j = 0;
+  if (iResProperty == I16_LUMA_DC) {
+    do {
+      sTCoeff[pScanTable[j]] = pSignificantMap[j];
+      ++j;
+    } while (j < 16);
+    WelsLumaDcDequantIdct (sTCoeff, uiQp, pCtx);
+  } else if (iResProperty == CHROMA_DC_U || iResProperty == CHROMA_DC_V) {
+    do {
+      sTCoeff[pScanTable[j]] = pSignificantMap[j];
+      ++j;
+    } while (j < 4);
+    //iHadamard2x2
+    WelsChromaDcIdct (sTCoeff);
+    //scaling
+    if (!pCtx->bUseScalingList) {
+      for (j = 0; j < 4; ++j) {
+        sTCoeff[pScanTable[j]] = (int16_t) ((int64_t)sTCoeff[pScanTable[j]] * (int64_t)pDeQuantMul[0] >> 1);
+      }
+    } else { //with scaling list
+      for (j = 0; j < 4; ++j) {
+        sTCoeff[pScanTable[j]] = (int16_t) ((int64_t)sTCoeff[pScanTable[j]] * (int64_t)pDeQuantMul[0] >> 5);
+      }
+    }
+  } else { //luma ac, chroma ac
+    do {
+      if (pSignificantMap[j] != 0) {
+        if (!pCtx->bUseScalingList) {
+          sTCoeff[pScanTable[j]] = pSignificantMap[j] * pDeQuantMul[pScanTable[j] & 0x07];
+        } else {
+          sTCoeff[pScanTable[j]] = (int16_t) (((int64_t)pSignificantMap[j] * (int64_t)pDeQuantMul[pScanTable[j]] + 8) >> 4);
+        }
+      }
+      ++j;
+    } while (j < 16);
+  }
+  return ERR_NONE;
+}
+
+int32_t ParseIPCMInfoCabac (PWelsDecoderContext pCtx) {
+  int32_t i;
+  PWelsCabacDecEngine pCabacDecEngine = pCtx->pCabacDecEngine;
+  SBitStringAux* pBsAux = pCtx->pCurDqLayer->pBitStringAux;
+  SDqLayer* pCurLayer = pCtx->pCurDqLayer;
+  int32_t iDstStrideLuma = pCurLayer->pDec->iLinesize[0];
+  int32_t iDstStrideChroma = pCurLayer->pDec->iLinesize[1];
+  int32_t iMbX = pCurLayer->iMbX;
+  int32_t iMbY = pCurLayer->iMbY;
+  int32_t iMbXy = pCurLayer->iMbXyIndex;
+
+  int32_t iMbOffsetLuma = (iMbX + iMbY * iDstStrideLuma) << 4;
+  int32_t iMbOffsetChroma = (iMbX + iMbY * iDstStrideChroma) << 3;
+
+  uint8_t* pMbDstY = pCtx->pDec->pData[0] + iMbOffsetLuma;
+  uint8_t* pMbDstU = pCtx->pDec->pData[1] + iMbOffsetChroma;
+  uint8_t* pMbDstV = pCtx->pDec->pData[2] + iMbOffsetChroma;
+
+  uint8_t* pPtrSrc;
+
+  pCurLayer->pMbType[iMbXy] = MB_TYPE_INTRA_PCM;
+  RestoreCabacDecEngineToBS (pCabacDecEngine, pBsAux);
+  intX_t iBytesLeft = pBsAux->pEndBuf - pBsAux->pCurBuf;
+  if (iBytesLeft < 384) {
+    return GENERATE_ERROR_NO (ERR_LEVEL_MB_DATA, ERR_CABAC_NO_BS_TO_READ);
+  }
+  pPtrSrc = pBsAux->pCurBuf;
+  if (!pCtx->pParam->bParseOnly) {
+    for (i = 0; i < 16; i++) {   //luma
+      memcpy (pMbDstY, pPtrSrc, 16);
+      pMbDstY += iDstStrideLuma;
+      pPtrSrc += 16;
+    }
+    for (i = 0; i < 8; i++) {   //cb
+      memcpy (pMbDstU, pPtrSrc, 8);
+      pMbDstU += iDstStrideChroma;
+      pPtrSrc += 8;
+    }
+    for (i = 0; i < 8; i++) {   //cr
+      memcpy (pMbDstV, pPtrSrc, 8);
+      pMbDstV += iDstStrideChroma;
+      pPtrSrc += 8;
+    }
+  }
+
+  pBsAux->pCurBuf += 384;
+
+  pCurLayer->pLumaQp[iMbXy] = 0;
+  pCurLayer->pChromaQp[iMbXy][0] = pCurLayer->pChromaQp[iMbXy][1] = 0;
+  memset (pCurLayer->pNzc[iMbXy], 16, sizeof (pCurLayer->pNzc[iMbXy]));
+
+  //step 4: cabac engine init
+  WELS_READ_VERIFY (InitReadBits (pBsAux, 1));
+  WELS_READ_VERIFY (InitCabacDecEngineFromBS (pCabacDecEngine, pBsAux));
+  return ERR_NONE;
+}
+}
--- a/codec/decoder/core/src/parse_mb_syn_cavlc.cpp
+++ b/codec/decoder/core/src/parse_mb_syn_cavlc.cpp
@@ -252,6 +252,12 @@
   int32_t iLeftTopXy  = 0;
   int32_t iRightTopXy = 0;
 
+  PSlice pSlice = &pCurLayer->sLayerInfo.sSliceInLayer;
+  PSliceHeader pSliceHeader = &pSlice->sSliceHeaderExt.sSliceHeader;
+  int32_t listCount = 1;
+  if (pSliceHeader->eSliceType == B_SLICE) {
+    listCount = 2;
+  }
   //stuff non_zero_coeff_count from pNeighAvail(left and top)
   WelsFillCacheNonZeroCount (pNeighAvail, pNonZeroCount, pCurLayer);
 
@@ -268,113 +274,156 @@
     iRightTopXy = iCurXy + 1 - pCurLayer->iMbWidth;
   }
 
-  //stuff mv_cache and iRefIdxArray from left and top (inter)
-  if (pNeighAvail->iLeftAvail && IS_INTER (pNeighAvail->iLeftType)) {
-    ST32 (iMvArray[0][ 6], LD32 (pCurLayer->pMv[0][iLeftXy][ 3]));
-    ST32 (iMvArray[0][12], LD32 (pCurLayer->pMv[0][iLeftXy][ 7]));
-    ST32 (iMvArray[0][18], LD32 (pCurLayer->pMv[0][iLeftXy][11]));
-    ST32 (iMvArray[0][24], LD32 (pCurLayer->pMv[0][iLeftXy][15]));
+  for (int32_t listIdx = 0; listIdx < listCount; ++listIdx) {
+    //stuff mv_cache and iRefIdxArray from left and top (inter)
+    if (pNeighAvail->iLeftAvail && IS_INTER (pNeighAvail->iLeftType)) {
+      ST32 (iMvArray[listIdx][6], LD32 (pCurLayer->pMv[listIdx][iLeftXy][3]));
+      ST32 (iMvArray[listIdx][12], LD32 (pCurLayer->pMv[listIdx][iLeftXy][7]));
+      ST32 (iMvArray[listIdx][18], LD32 (pCurLayer->pMv[listIdx][iLeftXy][11]));
+      ST32 (iMvArray[listIdx][24], LD32 (pCurLayer->pMv[listIdx][iLeftXy][15]));
 
-    ST32 (iMvdCache[0][ 6], LD32 (pCurLayer->pMvd[0][iLeftXy][ 3]));
-    ST32 (iMvdCache[0][12], LD32 (pCurLayer->pMvd[0][iLeftXy][ 7]));
-    ST32 (iMvdCache[0][18], LD32 (pCurLayer->pMvd[0][iLeftXy][11]));
-    ST32 (iMvdCache[0][24], LD32 (pCurLayer->pMvd[0][iLeftXy][15]));
+      ST32 (iMvdCache[listIdx][6], LD32 (pCurLayer->pMvd[listIdx][iLeftXy][3]));
+      ST32 (iMvdCache[listIdx][12], LD32 (pCurLayer->pMvd[listIdx][iLeftXy][7]));
+      ST32 (iMvdCache[listIdx][18], LD32 (pCurLayer->pMvd[listIdx][iLeftXy][11]));
+      ST32 (iMvdCache[listIdx][24], LD32 (pCurLayer->pMvd[listIdx][iLeftXy][15]));
 
-    iRefIdxArray[0][ 6] = pCurLayer->pRefIndex[0][iLeftXy][ 3];
-    iRefIdxArray[0][12] = pCurLayer->pRefIndex[0][iLeftXy][ 7];
-    iRefIdxArray[0][18] = pCurLayer->pRefIndex[0][iLeftXy][11];
-    iRefIdxArray[0][24] = pCurLayer->pRefIndex[0][iLeftXy][15];
-  } else {
-    ST32 (iMvArray[0][ 6], 0);
-    ST32 (iMvArray[0][12], 0);
-    ST32 (iMvArray[0][18], 0);
-    ST32 (iMvArray[0][24], 0);
+      iRefIdxArray[listIdx][6] = pCurLayer->pRefIndex[listIdx][iLeftXy][3];
+      iRefIdxArray[listIdx][12] = pCurLayer->pRefIndex[listIdx][iLeftXy][7];
+      iRefIdxArray[listIdx][18] = pCurLayer->pRefIndex[listIdx][iLeftXy][11];
+      iRefIdxArray[listIdx][24] = pCurLayer->pRefIndex[listIdx][iLeftXy][15];
+    } else {
+      ST32 (iMvArray[listIdx][6], 0);
+      ST32 (iMvArray[listIdx][12], 0);
+      ST32 (iMvArray[listIdx][18], 0);
+      ST32 (iMvArray[listIdx][24], 0);
 
-    ST32 (iMvdCache[0][ 6], 0);
-    ST32 (iMvdCache[0][12], 0);
-    ST32 (iMvdCache[0][18], 0);
-    ST32 (iMvdCache[0][24], 0);
+      ST32 (iMvdCache[listIdx][6], 0);
+      ST32 (iMvdCache[listIdx][12], 0);
+      ST32 (iMvdCache[listIdx][18], 0);
+      ST32 (iMvdCache[listIdx][24], 0);
 
 
-    if (0 == pNeighAvail->iLeftAvail) { //not available
-      iRefIdxArray[0][ 6] =
-        iRefIdxArray[0][12] =
-          iRefIdxArray[0][18] =
-            iRefIdxArray[0][24] = REF_NOT_AVAIL;
-    } else { //available but is intra mb type
-      iRefIdxArray[0][ 6] =
-        iRefIdxArray[0][12] =
-          iRefIdxArray[0][18] =
-            iRefIdxArray[0][24] = REF_NOT_IN_LIST;
+      if (0 == pNeighAvail->iLeftAvail) { //not available
+        iRefIdxArray[listIdx][6] =
+          iRefIdxArray[listIdx][12] =
+            iRefIdxArray[listIdx][18] =
+              iRefIdxArray[listIdx][24] = REF_NOT_AVAIL;
+      } else { //available but is intra mb type
+        iRefIdxArray[listIdx][6] =
+          iRefIdxArray[listIdx][12] =
+            iRefIdxArray[listIdx][18] =
+              iRefIdxArray[listIdx][24] = REF_NOT_IN_LIST;
+      }
     }
+    if (pNeighAvail->iLeftTopAvail && IS_INTER (pNeighAvail->iLeftTopType)) {
+      ST32 (iMvArray[listIdx][0], LD32 (pCurLayer->pMv[listIdx][iLeftTopXy][15]));
+      ST32 (iMvdCache[listIdx][0], LD32 (pCurLayer->pMvd[listIdx][iLeftTopXy][15]));
+      iRefIdxArray[listIdx][0] = pCurLayer->pRefIndex[listIdx][iLeftTopXy][15];
+    } else {
+      ST32 (iMvArray[listIdx][0], 0);
+      ST32 (iMvdCache[listIdx][0], 0);
+      if (0 == pNeighAvail->iLeftTopAvail) { //not available
+        iRefIdxArray[listIdx][0] = REF_NOT_AVAIL;
+      } else { //available but is intra mb type
+        iRefIdxArray[listIdx][0] = REF_NOT_IN_LIST;
+      }
+    }
+
+    if (pNeighAvail->iTopAvail && IS_INTER (pNeighAvail->iTopType)) {
+      ST64 (iMvArray[listIdx][1], LD64 (pCurLayer->pMv[listIdx][iTopXy][12]));
+      ST64 (iMvArray[listIdx][3], LD64 (pCurLayer->pMv[listIdx][iTopXy][14]));
+      ST64 (iMvdCache[listIdx][1], LD64 (pCurLayer->pMvd[listIdx][iTopXy][12]));
+      ST64 (iMvdCache[listIdx][3], LD64 (pCurLayer->pMvd[listIdx][iTopXy][14]));
+      ST32 (&iRefIdxArray[listIdx][1], LD32 (&pCurLayer->pRefIndex[listIdx][iTopXy][12]));
+    } else {
+      ST64 (iMvArray[listIdx][1], 0);
+      ST64 (iMvArray[listIdx][3], 0);
+      ST64 (iMvdCache[listIdx][1], 0);
+      ST64 (iMvdCache[listIdx][3], 0);
+      if (0 == pNeighAvail->iTopAvail) { //not available
+        iRefIdxArray[listIdx][1] =
+          iRefIdxArray[listIdx][2] =
+            iRefIdxArray[listIdx][3] =
+              iRefIdxArray[listIdx][4] = REF_NOT_AVAIL;
+      } else { //available but is intra mb type
+        iRefIdxArray[listIdx][1] =
+          iRefIdxArray[listIdx][2] =
+            iRefIdxArray[listIdx][3] =
+              iRefIdxArray[listIdx][4] = REF_NOT_IN_LIST;
+      }
+    }
+
+    if (pNeighAvail->iRightTopAvail && IS_INTER (pNeighAvail->iRightTopType)) {
+      ST32 (iMvArray[listIdx][5], LD32 (pCurLayer->pMv[listIdx][iRightTopXy][12]));
+      ST32 (iMvdCache[listIdx][5], LD32 (pCurLayer->pMvd[listIdx][iRightTopXy][12]));
+      iRefIdxArray[listIdx][5] = pCurLayer->pRefIndex[listIdx][iRightTopXy][12];
+    } else {
+      ST32 (iMvArray[listIdx][5], 0);
+      if (0 == pNeighAvail->iRightTopAvail) { //not available
+        iRefIdxArray[listIdx][5] = REF_NOT_AVAIL;
+      } else { //available but is intra mb type
+        iRefIdxArray[listIdx][5] = REF_NOT_IN_LIST;
+      }
+    }
+
+    //right-top 4*4 block unavailable
+    ST32 (iMvArray[listIdx][9], 0);
+    ST32 (iMvArray[listIdx][21], 0);
+    ST32 (iMvArray[listIdx][11], 0);
+    ST32 (iMvArray[listIdx][17], 0);
+    ST32 (iMvArray[listIdx][23], 0);
+    ST32 (iMvdCache[listIdx][9], 0);
+    ST32 (iMvdCache[listIdx][21], 0);
+    ST32 (iMvdCache[listIdx][11], 0);
+    ST32 (iMvdCache[listIdx][17], 0);
+    ST32 (iMvdCache[listIdx][23], 0);
+    iRefIdxArray[listIdx][9] =
+      iRefIdxArray[listIdx][21] =
+        iRefIdxArray[listIdx][11] =
+          iRefIdxArray[listIdx][17] =
+            iRefIdxArray[listIdx][23] = REF_NOT_AVAIL;
   }
+}
+
+void WelsFillDirectCacheCabac (PWelsNeighAvail pNeighAvail, int8_t iDirect[30], PDqLayer pCurLayer) {
+
+  int32_t iCurXy = pCurLayer->iMbXyIndex;
+  int32_t iTopXy = 0;
+  int32_t iLeftXy = 0;
+  int32_t iLeftTopXy = 0;
+  int32_t iRightTopXy = 0;
+
+  if (pNeighAvail->iTopAvail) {
+    iTopXy = iCurXy - pCurLayer->iMbWidth;
+  }
+  if (pNeighAvail->iLeftAvail) {
+    iLeftXy = iCurXy - 1;
+  }
+  if (pNeighAvail->iLeftTopAvail) {
+    iLeftTopXy = iCurXy - 1 - pCurLayer->iMbWidth;
+  }
+  if (pNeighAvail->iRightTopAvail) {
+    iRightTopXy = iCurXy + 1 - pCurLayer->iMbWidth;
+  }
+  memset (iDirect, 0, 30);
+  if (pNeighAvail->iLeftAvail && IS_INTER (pNeighAvail->iLeftType)) {
+    iDirect[6] = pCurLayer->pDirect[iLeftXy][3];
+    iDirect[12] = pCurLayer->pDirect[iLeftXy][7];
+    iDirect[18] = pCurLayer->pDirect[iLeftXy][11];
+    iDirect[24] = pCurLayer->pDirect[iLeftXy][15];
+  }
   if (pNeighAvail->iLeftTopAvail && IS_INTER (pNeighAvail->iLeftTopType)) {
-    ST32 (iMvArray[0][0], LD32 (pCurLayer->pMv[0][iLeftTopXy][15]));
-    ST32 (iMvdCache[0][0], LD32 (pCurLayer->pMvd[0][iLeftTopXy][15]));
-    iRefIdxArray[0][0] = pCurLayer->pRefIndex[0][iLeftTopXy][15];
-  } else {
-    ST32 (iMvArray[0][0], 0);
-    ST32 (iMvdCache[0][0], 0);
-    if (0 == pNeighAvail->iLeftTopAvail) { //not available
-      iRefIdxArray[0][0] = REF_NOT_AVAIL;
-    } else { //available but is intra mb type
-      iRefIdxArray[0][0] = REF_NOT_IN_LIST;
-    }
+    iDirect[0] = pCurLayer->pDirect[iLeftTopXy][15];
   }
 
   if (pNeighAvail->iTopAvail && IS_INTER (pNeighAvail->iTopType)) {
-    ST64 (iMvArray[0][1], LD64 (pCurLayer->pMv[0][iTopXy][12]));
-    ST64 (iMvArray[0][3], LD64 (pCurLayer->pMv[0][iTopXy][14]));
-    ST64 (iMvdCache[0][1], LD64 (pCurLayer->pMvd[0][iTopXy][12]));
-    ST64 (iMvdCache[0][3], LD64 (pCurLayer->pMvd[0][iTopXy][14]));
-    ST32 (&iRefIdxArray[0][1], LD32 (&pCurLayer->pRefIndex[0][iTopXy][12]));
-  } else {
-    ST64 (iMvArray[0][1], 0);
-    ST64 (iMvArray[0][3], 0);
-    ST64 (iMvdCache[0][1], 0);
-    ST64 (iMvdCache[0][3], 0);
-    if (0 == pNeighAvail->iTopAvail) { //not available
-      iRefIdxArray[0][1] =
-        iRefIdxArray[0][2] =
-          iRefIdxArray[0][3] =
-            iRefIdxArray[0][4] = REF_NOT_AVAIL;
-    } else { //available but is intra mb type
-      iRefIdxArray[0][1] =
-        iRefIdxArray[0][2] =
-          iRefIdxArray[0][3] =
-            iRefIdxArray[0][4] = REF_NOT_IN_LIST;
-    }
+    ST32 (&iDirect[1], LD32 (&pCurLayer->pDirect[iTopXy][12]));
   }
 
   if (pNeighAvail->iRightTopAvail && IS_INTER (pNeighAvail->iRightTopType)) {
-    ST32 (iMvArray[0][5], LD32 (pCurLayer->pMv[0][iRightTopXy][12]));
-    ST32 (iMvdCache[0][5], LD32 (pCurLayer->pMvd[0][iRightTopXy][12]));
-    iRefIdxArray[0][5] = pCurLayer->pRefIndex[0][iRightTopXy][12];
-  } else {
-    ST32 (iMvArray[0][5], 0);
-    if (0 == pNeighAvail->iRightTopAvail) { //not available
-      iRefIdxArray[0][5] = REF_NOT_AVAIL;
-    } else { //available but is intra mb type
-      iRefIdxArray[0][5] = REF_NOT_IN_LIST;
-    }
+    iDirect[5] = pCurLayer->pDirect[iRightTopXy][12];
   }
-
   //right-top 4*4 block unavailable
-  ST32 (iMvArray[0][ 9], 0);
-  ST32 (iMvArray[0][21], 0);
-  ST32 (iMvArray[0][11], 0);
-  ST32 (iMvArray[0][17], 0);
-  ST32 (iMvArray[0][23], 0);
-  ST32 (iMvdCache[0][ 9], 0);
-  ST32 (iMvdCache[0][21], 0);
-  ST32 (iMvdCache[0][11], 0);
-  ST32 (iMvdCache[0][17], 0);
-  ST32 (iMvdCache[0][23], 0);
-  iRefIdxArray[0][ 9] =
-    iRefIdxArray[0][21] =
-      iRefIdxArray[0][11] =
-        iRefIdxArray[0][17] =
-          iRefIdxArray[0][23] = REF_NOT_AVAIL;
 }
 
 void WelsFillCacheInter (PWelsNeighAvail pNeighAvail, uint8_t* pNonZeroCount,
@@ -1052,7 +1101,7 @@
       WelsLog (& (pCtx->sLogCtx), WELS_LOG_WARNING, "inter parse: iMotionPredFlag = 1 not supported. ");
       return GENERATE_ERROR_NO (ERR_LEVEL_MB_DATA, ERR_INFO_UNSUPPORTED_ILP);
     }
-    PredMv (iMvArray, iRefIdxArray, 0, 4, iRefIdx, iMv);
+    PredMv (iMvArray, iRefIdxArray, LIST_0, 0, 4, iRefIdx, iMv);
 
     WELS_READ_VERIFY (BsGetSe (pBs, &iCode)); //mvd_l0[ mbPartIdx ][ 0 ][ compIdx ]
     iMv[0] += iCode;
@@ -1059,7 +1108,7 @@
     WELS_READ_VERIFY (BsGetSe (pBs, &iCode)); //mvd_l1[ mbPartIdx ][ 0 ][ compIdx ]
     iMv[1] += iCode;
     WELS_CHECK_SE_BOTH_WARNING (iMv[1], iMinVmv, iMaxVmv, "vertical mv");
-    UpdateP16x16MotionInfo (pCurDqLayer, iRefIdx, iMv);
+    UpdateP16x16MotionInfo (pCurDqLayer, LIST_0, iRefIdx, iMv);
   }
   break;
   case MB_TYPE_16x8: {
@@ -1091,7 +1140,7 @@
                               && ppRefPic[iRefIdx[i]]->bIsComplete);
     }
     for (i = 0; i < 2; i++) {
-      PredInter16x8Mv (iMvArray, iRefIdxArray, i << 3, iRefIdx[i], iMv);
+      PredInter16x8Mv (iMvArray, iRefIdxArray, LIST_0, i << 3, iRefIdx[i], iMv);
 
       WELS_READ_VERIFY (BsGetSe (pBs, &iCode)); //mvd_l0[ mbPartIdx ][ 0 ][ compIdx ]
       iMv[0] += iCode;
@@ -1098,7 +1147,7 @@
       WELS_READ_VERIFY (BsGetSe (pBs, &iCode)); //mvd_l1[ mbPartIdx ][ 0 ][ compIdx ]
       iMv[1] += iCode;
       WELS_CHECK_SE_BOTH_WARNING (iMv[1], iMinVmv, iMaxVmv, "vertical mv");
-      UpdateP16x8MotionInfo (pCurDqLayer, iMvArray, iRefIdxArray, i << 3, iRefIdx[i], iMv);
+      UpdateP16x8MotionInfo (pCurDqLayer, iMvArray, iRefIdxArray, LIST_0, i << 3, iRefIdx[i], iMv);
     }
   }
   break;
@@ -1133,7 +1182,7 @@
 
     }
     for (i = 0; i < 2; i++) {
-      PredInter8x16Mv (iMvArray, iRefIdxArray, i << 2, iRefIdx[i], iMv);
+      PredInter8x16Mv (iMvArray, iRefIdxArray, LIST_0, i << 2, iRefIdx[i], iMv);
 
       WELS_READ_VERIFY (BsGetSe (pBs, &iCode)); //mvd_l0[ mbPartIdx ][ 0 ][ compIdx ]
       iMv[0] += iCode;
@@ -1140,7 +1189,7 @@
       WELS_READ_VERIFY (BsGetSe (pBs, &iCode)); //mvd_l1[ mbPartIdx ][ 0 ][ compIdx ]
       iMv[1] += iCode;
       WELS_CHECK_SE_BOTH_WARNING (iMv[1], iMinVmv, iMaxVmv, "vertical mv");
-      UpdateP8x16MotionInfo (pCurDqLayer, iMvArray, iRefIdxArray, i << 2, iRefIdx[i], iMv);
+      UpdateP8x16MotionInfo (pCurDqLayer, iMvArray, iRefIdxArray, LIST_0, i << 2, iRefIdx[i], iMv);
     }
   }
   break;
@@ -1161,9 +1210,9 @@
       if (uiSubMbType >= 4) { //invalid uiSubMbType
         return GENERATE_ERROR_NO (ERR_LEVEL_MB_DATA, ERR_INFO_INVALID_SUB_MB_TYPE);
       }
-      pCurDqLayer->pSubMbType[iMbXy][i] = g_ksInterSubMbTypeInfo[uiSubMbType].iType;
-      iSubPartCount[i] = g_ksInterSubMbTypeInfo[uiSubMbType].iPartCount;
-      iPartWidth[i] = g_ksInterSubMbTypeInfo[uiSubMbType].iPartWidth;
+      pCurDqLayer->pSubMbType[iMbXy][i] = g_ksInterPSubMbTypeInfo[uiSubMbType].iType;
+      iSubPartCount[i] = g_ksInterPSubMbTypeInfo[uiSubMbType].iPartCount;
+      iPartWidth[i] = g_ksInterPSubMbTypeInfo[uiSubMbType].iPartWidth;
 
       // Need modification when B picture add in, reference to 7.3.5
       pCurDqLayer->pNoSubMbPartSizeLessThan8x8Flag[iMbXy] &= (uiSubMbType == 0);
@@ -1224,7 +1273,7 @@
         iPartIdx = iIdx + j * iBlockWidth;
         uiScan4Idx = g_kuiScan4[iPartIdx];
         uiCacheIdx = g_kuiCache30ScanIdx[iPartIdx];
-        PredMv (iMvArray, iRefIdxArray, iPartIdx, iBlockWidth, iRefIdx[i], iMv);
+        PredMv (iMvArray, iRefIdxArray, LIST_0, iPartIdx, iBlockWidth, iRefIdx[i], iMv);
 
         WELS_READ_VERIFY (BsGetSe (pBs, &iCode)); //mvd_l0[ mbPartIdx ][ subMbPartIdx ][ compIdx ]
         iMv[0] += iCode;
--- a/codec/decoder/core/src/pic_queue.cpp
+++ b/codec/decoder/core/src/pic_queue.cpp
@@ -108,18 +108,46 @@
   pPic->iFrameNum      = -1;
   pPic->bAvailableFlag = true;
 
+  uint32_t uiMbWidth = (kiPicWidth + 15) >> 4;
+  uint32_t uiMbHeight = (kiPicHeight + 15) >> 4;
+  uint32_t uiMbCount = uiMbWidth * uiMbHeight;
+  pPic->pMbType = (uint32_t*)pMa->WelsMallocz (uiMbCount * sizeof (uint32_t),
+                  "pPic->pMbType");
+  pPic->pMv[LIST_0] = (int16_t (*)[16][2])pMa->WelsMallocz (uiMbCount * sizeof (
+                        int16_t) * MV_A * MB_BLOCK4x4_NUM, "pPic->pMv[]");
+  pPic->pMv[LIST_1] = (int16_t (*)[16][2])pMa->WelsMallocz (uiMbCount * sizeof (
+                        int16_t) * MV_A * MB_BLOCK4x4_NUM, "pPic->pMv[]");
+  pPic->pRefIndex[LIST_0] = (int8_t (*)[16])pMa->WelsMallocz (uiMbCount * sizeof (
+                              int8_t) * MB_BLOCK4x4_NUM, "pCtx->sMb.pRefIndex[]");
+  pPic->pRefIndex[LIST_1] = (int8_t (*)[16])pMa->WelsMallocz (uiMbCount * sizeof (
+                              int8_t) * MB_BLOCK4x4_NUM, "pCtx->sMb.pRefIndex[]");
   return pPic;
 }
 
 void FreePicture (PPicture pPic, CMemoryAlign* pMa) {
   if (NULL != pPic) {
-
     if (pPic->pBuffer[0]) {
       pMa->WelsFree (pPic->pBuffer[0], "pPic->pBuffer[0]");
+      pPic->pBuffer[0] = NULL;
     }
 
-    pMa->WelsFree (pPic, "pPic");
+    if (pPic->pMbType) {
+      pMa->WelsFree (pPic->pMbType, "pPic->pMbType");
+      pPic->pMbType = NULL;
+    }
 
+    for (int32_t listIdx = LIST_0; listIdx < LIST_A; ++listIdx) {
+      if (pPic->pMv[listIdx]) {
+        pMa->WelsFree (pPic->pMv[listIdx], "pPic->pMv[]");
+        pPic->pMv[listIdx] = NULL;
+      }
+
+      if (pPic->pRefIndex[listIdx]) {
+        pMa->WelsFree (pPic->pRefIndex[listIdx], "pPic->pRefIndex[]");
+        pPic->pRefIndex[listIdx] = NULL;
+      }
+    }
+    pMa->WelsFree (pPic, "pPic");
     pPic = NULL;
   }
 }
--- a/codec/decoder/core/src/rec_mb.cpp
+++ b/codec/decoder/core/src/rec_mb.cpp
@@ -1,559 +1,1044 @@
-/*!
- * \copy
- *     Copyright (c)  2009-2013, Cisco Systems
- *     All rights reserved.
- *
- *     Redistribution and use in source and binary forms, with or without
- *     modification, are permitted provided that the following conditions
- *     are met:
- *
- *        * Redistributions of source code must retain the above copyright
- *          notice, this list of conditions and the following disclaimer.
- *
- *        * Redistributions in binary form must reproduce the above copyright
- *          notice, this list of conditions and the following disclaimer in
- *          the documentation and/or other materials provided with the
- *          distribution.
- *
- *     THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- *     "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- *     LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
- *     FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
- *     COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
- *     INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
- *     BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
- *     LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
- *     CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- *     LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
- *     ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- *     POSSIBILITY OF SUCH DAMAGE.
- *
- *
- * \file    rec_mb.c
- *
- * \brief   implementation for all macroblock decoding process after mb syntax parsing and residual decoding with cavlc.
- *
- * \date    3/18/2009 Created
- *
- *************************************************************************************
- */
-
-
-#include "rec_mb.h"
-#include "decode_slice.h"
-
-namespace WelsDec {
-
-void WelsFillRecNeededMbInfo (PWelsDecoderContext pCtx, bool bOutput, PDqLayer pCurLayer) {
-  PPicture pCurPic = pCtx->pDec;
-  int32_t iLumaStride   = pCurPic->iLinesize[0];
-  int32_t iChromaStride = pCurPic->iLinesize[1];
-  int32_t iMbX = pCurLayer->iMbX;
-  int32_t iMbY = pCurLayer->iMbY;
-
-  pCurLayer->iLumaStride = iLumaStride;
-  pCurLayer->iChromaStride = iChromaStride;
-
-  if (bOutput) {
-    pCurLayer->pPred[0] = pCurPic->pData[0] + ((iMbY * iLumaStride + iMbX) << 4);
-    pCurLayer->pPred[1] = pCurPic->pData[1] + ((iMbY * iChromaStride + iMbX) << 3);
-    pCurLayer->pPred[2] = pCurPic->pData[2] + ((iMbY * iChromaStride + iMbX) << 3);
-  }
-}
-
-int32_t RecI8x8Mb (int32_t iMbXy, PWelsDecoderContext pCtx, int16_t* pScoeffLevel, PDqLayer pDqLayer) {
-  RecI8x8Luma (iMbXy, pCtx, pScoeffLevel, pDqLayer);
-  RecI4x4Chroma (iMbXy, pCtx, pScoeffLevel, pDqLayer);
-  return ERR_NONE;
-}
-
-int32_t RecI8x8Luma (int32_t iMbXy, PWelsDecoderContext pCtx, int16_t* pScoeffLevel, PDqLayer pDqLayer) {
-  /*****get local variable from outer variable********/
-  /*prediction info*/
-  uint8_t* pPred = pDqLayer->pPred[0];
-
-  int32_t iLumaStride = pDqLayer->iLumaStride;
-  int32_t* pBlockOffset = pCtx->iDecBlockOffsetArray;
-  PGetIntraPred8x8Func* pGetI8x8LumaPredFunc = pCtx->pGetI8x8LumaPredFunc;
-
-  int8_t* pIntra8x8PredMode = pDqLayer->pIntra4x4FinalMode[iMbXy]; // I_NxN
-  int16_t* pRS = pScoeffLevel;
-  /*itransform info*/
-  PIdctResAddPredFunc pIdctResAddPredFunc = pCtx->pIdctResAddPredFunc8x8;
-
-  /*************local variable********************/
-  uint8_t i = 0;
-  bool bTLAvail[4], bTRAvail[4];
-  // Top-Right : Left : Top-Left : Top
-  bTLAvail[0] = !! (pDqLayer->pIntraNxNAvailFlag[iMbXy] & 0x02);
-  bTLAvail[1] = !! (pDqLayer->pIntraNxNAvailFlag[iMbXy] & 0x01);
-  bTLAvail[2] = !! (pDqLayer->pIntraNxNAvailFlag[iMbXy] & 0x04);
-  bTLAvail[3] = true;
-
-  bTRAvail[0] = !! (pDqLayer->pIntraNxNAvailFlag[iMbXy] & 0x01);
-  bTRAvail[1] = !! (pDqLayer->pIntraNxNAvailFlag[iMbXy] & 0x08);
-  bTRAvail[2] = true;
-  bTRAvail[3] = false;
-
-  /*************real process*********************/
-  for (i = 0; i < 4; i++) {
-
-    uint8_t* pPredI8x8 = pPred + pBlockOffset[i << 2];
-    uint8_t uiMode = pIntra8x8PredMode[g_kuiScan4[i << 2]];
-
-    pGetI8x8LumaPredFunc[uiMode] (pPredI8x8, iLumaStride, bTLAvail[i], bTRAvail[i]);
-
-    int32_t iIndex = g_kuiMbCountScan4Idx[i << 2];
-    if (pDqLayer->pNzc[iMbXy][iIndex] || pDqLayer->pNzc[iMbXy][iIndex + 1] || pDqLayer->pNzc[iMbXy][iIndex + 4]
-        || pDqLayer->pNzc[iMbXy][iIndex + 5]) {
-      int16_t* pRSI8x8 = &pRS[i << 6];
-      pIdctResAddPredFunc (pPredI8x8, iLumaStride, pRSI8x8);
-    }
-  }
-
-  return ERR_NONE;
-}
-
-int32_t RecI4x4Mb (int32_t iMBXY, PWelsDecoderContext pCtx, int16_t* pScoeffLevel, PDqLayer pDqLayer) {
-  RecI4x4Luma (iMBXY, pCtx, pScoeffLevel, pDqLayer);
-  RecI4x4Chroma (iMBXY, pCtx, pScoeffLevel, pDqLayer);
-  return ERR_NONE;
-}
-
-
-int32_t RecI4x4Luma (int32_t iMBXY, PWelsDecoderContext pCtx, int16_t* pScoeffLevel, PDqLayer pDqLayer) {
-  /*****get local variable from outer variable********/
-  /*prediction info*/
-  uint8_t* pPred = pDqLayer->pPred[0];
-
-  int32_t iLumaStride = pDqLayer->iLumaStride;
-  int32_t* pBlockOffset = pCtx->iDecBlockOffsetArray;
-  PGetIntraPredFunc* pGetI4x4LumaPredFunc = pCtx->pGetI4x4LumaPredFunc;
-
-  int8_t* pIntra4x4PredMode = pDqLayer->pIntra4x4FinalMode[iMBXY];
-  int16_t* pRS = pScoeffLevel;
-  /*itransform info*/
-  PIdctResAddPredFunc pIdctResAddPredFunc = pCtx->pIdctResAddPredFunc;
-
-
-  /*************local variable********************/
-  uint8_t i = 0;
-
-  /*************real process*********************/
-  for (i = 0; i < 16; i++) {
-
-    uint8_t* pPredI4x4 = pPred + pBlockOffset[i];
-    uint8_t uiMode = pIntra4x4PredMode[g_kuiScan4[i]];
-
-    pGetI4x4LumaPredFunc[uiMode] (pPredI4x4, iLumaStride);
-
-    if (pDqLayer->pNzc[iMBXY][g_kuiMbCountScan4Idx[i]]) {
-      int16_t* pRSI4x4 = &pRS[i << 4];
-      pIdctResAddPredFunc (pPredI4x4, iLumaStride, pRSI4x4);
-    }
-  }
-
-  return ERR_NONE;
-}
-
-
-int32_t RecI4x4Chroma (int32_t iMBXY, PWelsDecoderContext pCtx, int16_t* pScoeffLevel, PDqLayer pDqLayer) {
-  int32_t iChromaStride = pCtx->pCurDqLayer->pDec->iLinesize[1];
-
-  int8_t iChromaPredMode = pDqLayer->pChromaPredMode[iMBXY];
-
-  PGetIntraPredFunc* pGetIChromaPredFunc = pCtx->pGetIChromaPredFunc;
-
-  uint8_t* pPred = pDqLayer->pPred[1];
-
-  pGetIChromaPredFunc[iChromaPredMode] (pPred, iChromaStride);
-  pPred = pDqLayer->pPred[2];
-  pGetIChromaPredFunc[iChromaPredMode] (pPred, iChromaStride);
-
-  RecChroma (iMBXY, pCtx, pScoeffLevel, pDqLayer);
-
-  return ERR_NONE;
-}
-
-
-int32_t RecI16x16Mb (int32_t iMBXY, PWelsDecoderContext pCtx, int16_t* pScoeffLevel, PDqLayer pDqLayer) {
-  /*decoder use, encoder no use*/
-  int8_t iI16x16PredMode = pDqLayer->pIntraPredMode[iMBXY][7];
-  int8_t iChromaPredMode = pDqLayer->pChromaPredMode[iMBXY];
-  PGetIntraPredFunc* pGetIChromaPredFunc = pCtx->pGetIChromaPredFunc;
-  PGetIntraPredFunc* pGetI16x16LumaPredFunc = pCtx->pGetI16x16LumaPredFunc;
-  int32_t iUVStride = pCtx->pCurDqLayer->pDec->iLinesize[1];
-
-  /*common use by decoder&encoder*/
-  int32_t iYStride = pDqLayer->iLumaStride;
-  int16_t* pRS = pScoeffLevel;
-
-  uint8_t* pPred = pDqLayer->pPred[0];
-
-  PIdctFourResAddPredFunc pIdctFourResAddPredFunc = pCtx->pIdctFourResAddPredFunc;
-
-  /*decode i16x16 y*/
-  pGetI16x16LumaPredFunc[iI16x16PredMode] (pPred, iYStride);
-
-  /*1 mb is divided 16 4x4_block to idct*/
-  const int8_t* pNzc = pDqLayer->pNzc[iMBXY];
-  pIdctFourResAddPredFunc (pPred + 0 * iYStride + 0, iYStride, pRS + 0 * 64, pNzc +  0);
-  pIdctFourResAddPredFunc (pPred + 0 * iYStride + 8, iYStride, pRS + 1 * 64, pNzc +  2);
-  pIdctFourResAddPredFunc (pPred + 8 * iYStride + 0, iYStride, pRS + 2 * 64, pNzc +  8);
-  pIdctFourResAddPredFunc (pPred + 8 * iYStride + 8, iYStride, pRS + 3 * 64, pNzc + 10);
-
-  /*decode intra mb cb&cr*/
-  pPred = pDqLayer->pPred[1];
-  pGetIChromaPredFunc[iChromaPredMode] (pPred, iUVStride);
-  pPred = pDqLayer->pPred[2];
-  pGetIChromaPredFunc[iChromaPredMode] (pPred, iUVStride);
-  RecChroma (iMBXY, pCtx, pScoeffLevel, pDqLayer);
-
-  return ERR_NONE;
-}
-
-
-//according to current 8*8 block ref_index to gain reference picture
-static inline void GetRefPic (sMCRefMember* pMCRefMem, PWelsDecoderContext pCtx, int8_t* pRefIdxList,
-                              int32_t iIndex) {
-  PPicture pRefPic;
-
-  int8_t iRefIdx = pRefIdxList[iIndex];
-  pRefPic = pCtx->sRefPic.pRefList[LIST_0][iRefIdx];
-
-  pMCRefMem->iSrcLineLuma   = pRefPic->iLinesize[0];
-  pMCRefMem->iSrcLineChroma = pRefPic->iLinesize[1];
-
-  pMCRefMem->pSrcY = pRefPic->pData[0];
-  pMCRefMem->pSrcU = pRefPic->pData[1];
-  pMCRefMem->pSrcV = pRefPic->pData[2];
-}
-
-
-#ifndef MC_FLOW_SIMPLE_JUDGE
-#define MC_FLOW_SIMPLE_JUDGE 1
-#endif //MC_FLOW_SIMPLE_JUDGE
-void BaseMC (sMCRefMember* pMCRefMem, int32_t iXOffset, int32_t iYOffset, SMcFunc* pMCFunc,
-             int32_t iBlkWidth, int32_t iBlkHeight, int16_t iMVs[2]) {
-  int32_t iFullMVx = (iXOffset << 2) + iMVs[0]; //quarter pixel
-  int32_t iFullMVy = (iYOffset << 2) + iMVs[1];
-  iFullMVx = WELS_CLIP3 (iFullMVx, ((-PADDING_LENGTH + 2) * (1 << 2)),
-                         ((pMCRefMem->iPicWidth + PADDING_LENGTH - 19) * (1 << 2)));
-  iFullMVy = WELS_CLIP3 (iFullMVy, ((-PADDING_LENGTH + 2) * (1 << 2)),
-                         ((pMCRefMem->iPicHeight + PADDING_LENGTH - 19) * (1 << 2)));
-
-  int32_t iSrcPixOffsetLuma = (iFullMVx >> 2) + (iFullMVy >> 2) * pMCRefMem->iSrcLineLuma;
-  int32_t iSrcPixOffsetChroma = (iFullMVx >> 3) + (iFullMVy >> 3) * pMCRefMem->iSrcLineChroma;
-
-  int32_t iBlkWidthChroma = iBlkWidth >> 1;
-  int32_t iBlkHeightChroma = iBlkHeight >> 1;
-
-  uint8_t* pSrcY = pMCRefMem->pSrcY + iSrcPixOffsetLuma;
-  uint8_t* pSrcU = pMCRefMem->pSrcU + iSrcPixOffsetChroma;
-  uint8_t* pSrcV = pMCRefMem->pSrcV + iSrcPixOffsetChroma;
-  uint8_t* pDstY = pMCRefMem->pDstY;
-  uint8_t* pDstU = pMCRefMem->pDstU;
-  uint8_t* pDstV = pMCRefMem->pDstV;
-
-  pMCFunc->pMcLumaFunc (pSrcY, pMCRefMem->iSrcLineLuma, pDstY, pMCRefMem->iDstLineLuma, iFullMVx, iFullMVy, iBlkWidth,
-                        iBlkHeight);
-  pMCFunc->pMcChromaFunc (pSrcU, pMCRefMem->iSrcLineChroma, pDstU, pMCRefMem->iDstLineChroma, iFullMVx, iFullMVy,
-                          iBlkWidthChroma, iBlkHeightChroma);
-  pMCFunc->pMcChromaFunc (pSrcV, pMCRefMem->iSrcLineChroma, pDstV, pMCRefMem->iDstLineChroma, iFullMVx, iFullMVy,
-                          iBlkWidthChroma, iBlkHeightChroma);
-
-}
-
-void WeightPrediction (PDqLayer pCurDqLayer, sMCRefMember* pMCRefMem, int32_t iRefIdx, int32_t iBlkWidth,
-                       int32_t iBlkHeight) {
-
-
-  int32_t iLog2denom, iWoc, iOoc;
-  int32_t iPredTemp, iLineStride;
-  int32_t iPixel = 0;
-  uint8_t* pDst;
-  //luma
-  iLog2denom = pCurDqLayer->pPredWeightTable->uiLumaLog2WeightDenom;
-  iWoc = pCurDqLayer->pPredWeightTable->sPredList[LIST_0].iLumaWeight[iRefIdx];
-  iOoc = pCurDqLayer->pPredWeightTable->sPredList[LIST_0].iLumaOffset[iRefIdx];
-  iLineStride = pMCRefMem->iDstLineLuma;
-
-  for (int i = 0; i < iBlkHeight; i++) {
-    for (int j = 0; j < iBlkWidth; j++) {
-      iPixel = j + i * (iLineStride);
-      if (iLog2denom >= 1) {
-        iPredTemp = ((pMCRefMem->pDstY[iPixel] * iWoc + (1 << (iLog2denom - 1))) >> iLog2denom) + iOoc;
-
-        pMCRefMem->pDstY[iPixel] = WELS_CLIP3 (iPredTemp, 0, 255);
-      } else {
-        iPredTemp = pMCRefMem->pDstY[iPixel] * iWoc + iOoc;
-
-        pMCRefMem->pDstY[iPixel] = WELS_CLIP3 (iPredTemp, 0, 255);
-
-      }
-    }
-  }
-
-
-  //UV
-  iBlkWidth = iBlkWidth >> 1;
-  iBlkHeight = iBlkHeight >> 1;
-  iLog2denom = pCurDqLayer->pPredWeightTable->uiChromaLog2WeightDenom;
-  iLineStride = pMCRefMem->iDstLineChroma;
-
-  for (int i = 0; i < 2; i++) {
-
-
-    //iLog2denom = pCurDqLayer->pPredWeightTable->uiChromaLog2WeightDenom;
-    iWoc =  pCurDqLayer->pPredWeightTable->sPredList[LIST_0].iChromaWeight[iRefIdx][i];
-    iOoc = pCurDqLayer->pPredWeightTable->sPredList[LIST_0].iChromaOffset[iRefIdx][i];
-    pDst = i ? pMCRefMem->pDstV : pMCRefMem->pDstU;
-    //iLineStride = pMCRefMem->iDstLineChroma;
-
-    for (int i = 0; i < iBlkHeight ; i++) {
-      for (int j = 0; j < iBlkWidth; j++) {
-        iPixel = j + i * (iLineStride);
-        if (iLog2denom >= 1) {
-          iPredTemp = ((pDst[iPixel] * iWoc + (1 << (iLog2denom - 1))) >> iLog2denom) + iOoc;
-
-          pDst[iPixel] = WELS_CLIP3 (iPredTemp, 0, 255);
-        } else {
-          iPredTemp = pDst[iPixel] * iWoc + iOoc;
-
-          pDst[iPixel] = WELS_CLIP3 (iPredTemp, 0, 255);
-
-        }
-      }
-
-    }
-
-
-  }
-}
-
-
-void GetInterPred (uint8_t* pPredY, uint8_t* pPredCb, uint8_t* pPredCr, PWelsDecoderContext pCtx) {
-  sMCRefMember pMCRefMem;
-  PDqLayer pCurDqLayer = pCtx->pCurDqLayer;
-  SMcFunc* pMCFunc = &pCtx->sMcFunc;
-
-  int32_t iMBXY = pCurDqLayer->iMbXyIndex;
-
-  int16_t iMVs[2] = {0};
-
-  int32_t iMBType = pCurDqLayer->pMbType[iMBXY];
-
-  int32_t iMBOffsetX = pCurDqLayer->iMbX << 4;
-  int32_t iMBOffsetY = pCurDqLayer->iMbY << 4;
-
-  int32_t iDstLineLuma   = pCtx->pDec->iLinesize[0];
-  int32_t iDstLineChroma = pCtx->pDec->iLinesize[1];
-
-  int32_t iBlk8X, iBlk8Y, iBlk4X, iBlk4Y, i, j, iIIdx, iJIdx;
-
-  pMCRefMem.iPicWidth = (pCurDqLayer->sLayerInfo.sSliceInLayer.sSliceHeaderExt.sSliceHeader.iMbWidth << 4);
-  pMCRefMem.iPicHeight = (pCurDqLayer->sLayerInfo.sSliceInLayer.sSliceHeaderExt.sSliceHeader.iMbHeight << 4);
-
-  pMCRefMem.pDstY = pPredY;
-  pMCRefMem.pDstU = pPredCb;
-  pMCRefMem.pDstV = pPredCr;
-
-  pMCRefMem.iDstLineLuma   = iDstLineLuma;
-  pMCRefMem.iDstLineChroma = iDstLineChroma;
-
-  int32_t iRefIndex = 0;
-
-  switch (iMBType) {
-  case MB_TYPE_SKIP:
-  case MB_TYPE_16x16:
-    iMVs[0] = pCurDqLayer->pMv[0][iMBXY][0][0];
-    iMVs[1] = pCurDqLayer->pMv[0][iMBXY][0][1];
-    GetRefPic (&pMCRefMem, pCtx, pCurDqLayer->pRefIndex[0][iMBXY], 0);
-    BaseMC (&pMCRefMem, iMBOffsetX, iMBOffsetY, pMCFunc, 16, 16, iMVs);
-
-    if (pCurDqLayer->bUseWeightPredictionFlag) {
-      iRefIndex = pCurDqLayer->pRefIndex[0][iMBXY][0];
-      WeightPrediction (pCurDqLayer, &pMCRefMem, iRefIndex, 16, 16);
-    }
-    break;
-  case MB_TYPE_16x8:
-    iMVs[0] = pCurDqLayer->pMv[0][iMBXY][0][0];
-    iMVs[1] = pCurDqLayer->pMv[0][iMBXY][0][1];
-    GetRefPic (&pMCRefMem, pCtx, pCurDqLayer->pRefIndex[0][iMBXY], 0);
-    BaseMC (&pMCRefMem, iMBOffsetX, iMBOffsetY, pMCFunc, 16, 8, iMVs);
-
-    if (pCurDqLayer->bUseWeightPredictionFlag) {
-      iRefIndex = pCurDqLayer->pRefIndex[0][iMBXY][0];
-      WeightPrediction (pCurDqLayer, &pMCRefMem, iRefIndex, 16, 8);
-    }
-
-    iMVs[0] = pCurDqLayer->pMv[0][iMBXY][8][0];
-    iMVs[1] = pCurDqLayer->pMv[0][iMBXY][8][1];
-    GetRefPic (&pMCRefMem, pCtx, pCurDqLayer->pRefIndex[0][iMBXY], 8);
-    pMCRefMem.pDstY = pPredY  + (iDstLineLuma << 3);
-    pMCRefMem.pDstU = pPredCb + (iDstLineChroma << 2);
-    pMCRefMem.pDstV = pPredCr + (iDstLineChroma << 2);
-    BaseMC (&pMCRefMem, iMBOffsetX, iMBOffsetY + 8, pMCFunc, 16, 8, iMVs);
-
-    if (pCurDqLayer->bUseWeightPredictionFlag) {
-      iRefIndex = pCurDqLayer->pRefIndex[0][iMBXY][8];
-      WeightPrediction (pCurDqLayer, &pMCRefMem, iRefIndex, 16, 8);
-    }
-    break;
-  case MB_TYPE_8x16:
-    iMVs[0] = pCurDqLayer->pMv[0][iMBXY][0][0];
-    iMVs[1] = pCurDqLayer->pMv[0][iMBXY][0][1];
-    GetRefPic (&pMCRefMem, pCtx, pCurDqLayer->pRefIndex[0][iMBXY], 0);
-    BaseMC (&pMCRefMem, iMBOffsetX, iMBOffsetY, pMCFunc, 8, 16, iMVs);
-    if (pCurDqLayer->bUseWeightPredictionFlag) {
-      iRefIndex = pCurDqLayer->pRefIndex[0][iMBXY][0];
-      WeightPrediction (pCurDqLayer, &pMCRefMem, iRefIndex, 8, 16);
-    }
-
-    iMVs[0] = pCurDqLayer->pMv[0][iMBXY][2][0];
-    iMVs[1] = pCurDqLayer->pMv[0][iMBXY][2][1];
-    GetRefPic (&pMCRefMem, pCtx, pCurDqLayer->pRefIndex[0][iMBXY], 2);
-    pMCRefMem.pDstY = pPredY + 8;
-    pMCRefMem.pDstU = pPredCb + 4;
-    pMCRefMem.pDstV = pPredCr + 4;
-    BaseMC (&pMCRefMem, iMBOffsetX + 8, iMBOffsetY, pMCFunc, 8, 16, iMVs);
-
-    if (pCurDqLayer->bUseWeightPredictionFlag) {
-      iRefIndex = pCurDqLayer->pRefIndex[0][iMBXY][2];
-      WeightPrediction (pCurDqLayer, &pMCRefMem, iRefIndex, 8, 16);
-    }
-    break;
-  case MB_TYPE_8x8:
-  case MB_TYPE_8x8_REF0: {
-    uint32_t iSubMBType;
-    int32_t iXOffset, iYOffset;
-    uint8_t* pDstY, *pDstU, *pDstV;
-    for (i = 0; i < 4; i++) {
-      iSubMBType = pCurDqLayer->pSubMbType[iMBXY][i];
-      iBlk8X = (i & 1) << 3;
-      iBlk8Y = (i >> 1) << 3;
-      iXOffset = iMBOffsetX + iBlk8X;
-      iYOffset = iMBOffsetY + iBlk8Y;
-
-      iIIdx = ((i >> 1) << 3) + ((i & 1) << 1);
-      GetRefPic (&pMCRefMem, pCtx, pCurDqLayer->pRefIndex[0][iMBXY], iIIdx);
-      iRefIndex = pCurDqLayer->bUseWeightPredictionFlag ? pCurDqLayer->pRefIndex[0][iMBXY][iIIdx] : 0;
-
-      pDstY = pPredY + iBlk8X + iBlk8Y * iDstLineLuma;
-      pDstU = pPredCb + (iBlk8X >> 1) + (iBlk8Y >> 1) * iDstLineChroma;
-      pDstV = pPredCr + (iBlk8X >> 1) + (iBlk8Y >> 1) * iDstLineChroma;
-      pMCRefMem.pDstY = pDstY;
-      pMCRefMem.pDstU = pDstU;
-      pMCRefMem.pDstV = pDstV;
-      switch (iSubMBType) {
-      case SUB_MB_TYPE_8x8:
-        iMVs[0] = pCurDqLayer->pMv[0][iMBXY][iIIdx][0];
-        iMVs[1] = pCurDqLayer->pMv[0][iMBXY][iIIdx][1];
-        BaseMC (&pMCRefMem, iXOffset, iYOffset, pMCFunc, 8, 8, iMVs);
-        if (pCurDqLayer->bUseWeightPredictionFlag) {
-
-          WeightPrediction (pCurDqLayer, &pMCRefMem, iRefIndex, 8, 8);
-        }
-
-        break;
-      case SUB_MB_TYPE_8x4:
-        iMVs[0] = pCurDqLayer->pMv[0][iMBXY][iIIdx][0];
-        iMVs[1] = pCurDqLayer->pMv[0][iMBXY][iIIdx][1];
-        BaseMC (&pMCRefMem, iXOffset, iYOffset, pMCFunc, 8, 4, iMVs);
-        if (pCurDqLayer->bUseWeightPredictionFlag) {
-
-          WeightPrediction (pCurDqLayer, &pMCRefMem, iRefIndex, 8, 4);
-        }
-
-
-        iMVs[0] = pCurDqLayer->pMv[0][iMBXY][iIIdx + 4][0];
-        iMVs[1] = pCurDqLayer->pMv[0][iMBXY][iIIdx + 4][1];
-        pMCRefMem.pDstY += (iDstLineLuma << 2);
-        pMCRefMem.pDstU += (iDstLineChroma << 1);
-        pMCRefMem.pDstV += (iDstLineChroma << 1);
-        BaseMC (&pMCRefMem, iXOffset, iYOffset + 4, pMCFunc, 8, 4, iMVs);
-        if (pCurDqLayer->bUseWeightPredictionFlag) {
-
-          WeightPrediction (pCurDqLayer, &pMCRefMem, iRefIndex, 8, 4);
-        }
-
-        break;
-      case SUB_MB_TYPE_4x8:
-        iMVs[0] = pCurDqLayer->pMv[0][iMBXY][iIIdx][0];
-        iMVs[1] = pCurDqLayer->pMv[0][iMBXY][iIIdx][1];
-        BaseMC (&pMCRefMem, iXOffset, iYOffset, pMCFunc, 4, 8, iMVs);
-        if (pCurDqLayer->bUseWeightPredictionFlag) {
-
-          WeightPrediction (pCurDqLayer, &pMCRefMem, iRefIndex, 4, 8);
-        }
-
-
-        iMVs[0] = pCurDqLayer->pMv[0][iMBXY][iIIdx + 1][0];
-        iMVs[1] = pCurDqLayer->pMv[0][iMBXY][iIIdx + 1][1];
-        pMCRefMem.pDstY += 4;
-        pMCRefMem.pDstU += 2;
-        pMCRefMem.pDstV += 2;
-        BaseMC (&pMCRefMem, iXOffset + 4, iYOffset, pMCFunc, 4, 8, iMVs);
-        if (pCurDqLayer->bUseWeightPredictionFlag) {
-
-          WeightPrediction (pCurDqLayer, &pMCRefMem, iRefIndex, 4, 8);
-        }
-
-        break;
-      case SUB_MB_TYPE_4x4: {
-        for (j = 0; j < 4; j++) {
-          int32_t iUVLineStride;
-          iJIdx = ((j >> 1) << 2) + (j & 1);
-
-          iBlk4X = (j & 1) << 2;
-          iBlk4Y = (j >> 1) << 2;
-
-          iUVLineStride = (iBlk4X >> 1) + (iBlk4Y >> 1) * iDstLineChroma;
-          pMCRefMem.pDstY = pDstY + iBlk4X + iBlk4Y * iDstLineLuma;
-          pMCRefMem.pDstU = pDstU + iUVLineStride;
-          pMCRefMem.pDstV = pDstV + iUVLineStride;
-
-          iMVs[0] = pCurDqLayer->pMv[0][iMBXY][iIIdx + iJIdx][0];
-          iMVs[1] = pCurDqLayer->pMv[0][iMBXY][iIIdx + iJIdx][1];
-          BaseMC (&pMCRefMem, iXOffset + iBlk4X, iYOffset + iBlk4Y, pMCFunc, 4, 4, iMVs);
-          if (pCurDqLayer->bUseWeightPredictionFlag) {
-
-            WeightPrediction (pCurDqLayer, &pMCRefMem, iRefIndex, 4, 4);
-          }
-
-        }
-      }
-      break;
-      default:
-        break;
-      }
-    }
-  }
-  break;
-  default:
-    break;
-  }
-}
-
-int32_t RecChroma (int32_t iMBXY, PWelsDecoderContext pCtx, int16_t* pScoeffLevel, PDqLayer pDqLayer) {
-  int32_t iChromaStride = pCtx->pCurDqLayer->pDec->iLinesize[1];
-  PIdctFourResAddPredFunc pIdctFourResAddPredFunc = pCtx->pIdctFourResAddPredFunc;
-
-  uint8_t i = 0;
-  uint8_t uiCbpC = pDqLayer->pCbp[iMBXY] >> 4;
-
-  if (1 == uiCbpC || 2 == uiCbpC) {
-    for (i = 0; i < 2; i++) {
-      int16_t* pRS = pScoeffLevel + 256 + (i << 6);
-      uint8_t* pPred = pDqLayer->pPred[i + 1];
-      const int8_t* pNzc = pDqLayer->pNzc[iMBXY] + 16 + 2 * i;
-
-      /*1 chroma is divided 4 4x4_block to idct*/
-      pIdctFourResAddPredFunc (pPred, iChromaStride, pRS, pNzc);
-    }
-  }
-
-  return ERR_NONE;
-}
-
-} // namespace WelsDec
+/*!
+ * \copy
+ *     Copyright (c)  2009-2013, Cisco Systems
+ *     All rights reserved.
+ *
+ *     Redistribution and use in source and binary forms, with or without
+ *     modification, are permitted provided that the following conditions
+ *     are met:
+ *
+ *        * Redistributions of source code must retain the above copyright
+ *          notice, this list of conditions and the following disclaimer.
+ *
+ *        * Redistributions in binary form must reproduce the above copyright
+ *          notice, this list of conditions and the following disclaimer in
+ *          the documentation and/or other materials provided with the
+ *          distribution.
+ *
+ *     THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *     "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *     LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+ *     FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+ *     COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ *     INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ *     BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ *     LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+ *     CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ *     LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
+ *     ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ *     POSSIBILITY OF SUCH DAMAGE.
+ *
+ *
+ * \file    rec_mb.c
+ *
+ * \brief   implementation for all macroblock decoding process after mb syntax parsing and residual decoding with cavlc.
+ *
+ * \date    3/18/2009 Created
+ *
+ *************************************************************************************
+ */
+
+
+#include "rec_mb.h"
+#include "decode_slice.h"
+
+namespace WelsDec {
+
+void WelsFillRecNeededMbInfo (PWelsDecoderContext pCtx, bool bOutput, PDqLayer pCurLayer) {
+  PPicture pCurPic = pCtx->pDec;
+  int32_t iLumaStride   = pCurPic->iLinesize[0];
+  int32_t iChromaStride = pCurPic->iLinesize[1];
+  int32_t iMbX = pCurLayer->iMbX;
+  int32_t iMbY = pCurLayer->iMbY;
+
+  pCurLayer->iLumaStride = iLumaStride;
+  pCurLayer->iChromaStride = iChromaStride;
+
+  if (bOutput) {
+    pCurLayer->pPred[0] = pCurPic->pData[0] + ((iMbY * iLumaStride + iMbX) << 4);
+    pCurLayer->pPred[1] = pCurPic->pData[1] + ((iMbY * iChromaStride + iMbX) << 3);
+    pCurLayer->pPred[2] = pCurPic->pData[2] + ((iMbY * iChromaStride + iMbX) << 3);
+  }
+}
+
+int32_t RecI8x8Mb (int32_t iMbXy, PWelsDecoderContext pCtx, int16_t* pScoeffLevel, PDqLayer pDqLayer) {
+  RecI8x8Luma (iMbXy, pCtx, pScoeffLevel, pDqLayer);
+  RecI4x4Chroma (iMbXy, pCtx, pScoeffLevel, pDqLayer);
+  return ERR_NONE;
+}
+
+int32_t RecI8x8Luma (int32_t iMbXy, PWelsDecoderContext pCtx, int16_t* pScoeffLevel, PDqLayer pDqLayer) {
+  /*****get local variable from outer variable********/
+  /*prediction info*/
+  uint8_t* pPred = pDqLayer->pPred[0];
+
+  int32_t iLumaStride = pDqLayer->iLumaStride;
+  int32_t* pBlockOffset = pCtx->iDecBlockOffsetArray;
+  PGetIntraPred8x8Func* pGetI8x8LumaPredFunc = pCtx->pGetI8x8LumaPredFunc;
+
+  int8_t* pIntra8x8PredMode = pDqLayer->pIntra4x4FinalMode[iMbXy]; // I_NxN
+  int16_t* pRS = pScoeffLevel;
+  /*itransform info*/
+  PIdctResAddPredFunc pIdctResAddPredFunc = pCtx->pIdctResAddPredFunc8x8;
+
+  /*************local variable********************/
+  uint8_t i = 0;
+  bool bTLAvail[4], bTRAvail[4];
+  // Top-Right : Left : Top-Left : Top
+  bTLAvail[0] = !! (pDqLayer->pIntraNxNAvailFlag[iMbXy] & 0x02);
+  bTLAvail[1] = !! (pDqLayer->pIntraNxNAvailFlag[iMbXy] & 0x01);
+  bTLAvail[2] = !! (pDqLayer->pIntraNxNAvailFlag[iMbXy] & 0x04);
+  bTLAvail[3] = true;
+
+  bTRAvail[0] = !! (pDqLayer->pIntraNxNAvailFlag[iMbXy] & 0x01);
+  bTRAvail[1] = !! (pDqLayer->pIntraNxNAvailFlag[iMbXy] & 0x08);
+  bTRAvail[2] = true;
+  bTRAvail[3] = false;
+
+  /*************real process*********************/
+  for (i = 0; i < 4; i++) {
+
+    uint8_t* pPredI8x8 = pPred + pBlockOffset[i << 2];
+    uint8_t uiMode = pIntra8x8PredMode[g_kuiScan4[i << 2]];
+
+    pGetI8x8LumaPredFunc[uiMode] (pPredI8x8, iLumaStride, bTLAvail[i], bTRAvail[i]);
+
+    int32_t iIndex = g_kuiMbCountScan4Idx[i << 2];
+    if (pDqLayer->pNzc[iMbXy][iIndex] || pDqLayer->pNzc[iMbXy][iIndex + 1] || pDqLayer->pNzc[iMbXy][iIndex + 4]
+        || pDqLayer->pNzc[iMbXy][iIndex + 5]) {
+      int16_t* pRSI8x8 = &pRS[i << 6];
+      pIdctResAddPredFunc (pPredI8x8, iLumaStride, pRSI8x8);
+    }
+  }
+
+  return ERR_NONE;
+}
+
+int32_t RecI4x4Mb (int32_t iMBXY, PWelsDecoderContext pCtx, int16_t* pScoeffLevel, PDqLayer pDqLayer) {
+  RecI4x4Luma (iMBXY, pCtx, pScoeffLevel, pDqLayer);
+  RecI4x4Chroma (iMBXY, pCtx, pScoeffLevel, pDqLayer);
+  return ERR_NONE;
+}
+
+
+int32_t RecI4x4Luma (int32_t iMBXY, PWelsDecoderContext pCtx, int16_t* pScoeffLevel, PDqLayer pDqLayer) {
+  /*****get local variable from outer variable********/
+  /*prediction info*/
+  uint8_t* pPred = pDqLayer->pPred[0];
+
+  int32_t iLumaStride = pDqLayer->iLumaStride;
+  int32_t* pBlockOffset = pCtx->iDecBlockOffsetArray;
+  PGetIntraPredFunc* pGetI4x4LumaPredFunc = pCtx->pGetI4x4LumaPredFunc;
+
+  int8_t* pIntra4x4PredMode = pDqLayer->pIntra4x4FinalMode[iMBXY];
+  int16_t* pRS = pScoeffLevel;
+  /*itransform info*/
+  PIdctResAddPredFunc pIdctResAddPredFunc = pCtx->pIdctResAddPredFunc;
+
+
+  /*************local variable********************/
+  uint8_t i = 0;
+
+  /*************real process*********************/
+  for (i = 0; i < 16; i++) {
+
+    uint8_t* pPredI4x4 = pPred + pBlockOffset[i];
+    uint8_t uiMode = pIntra4x4PredMode[g_kuiScan4[i]];
+
+    pGetI4x4LumaPredFunc[uiMode] (pPredI4x4, iLumaStride);
+
+    if (pDqLayer->pNzc[iMBXY][g_kuiMbCountScan4Idx[i]]) {
+      int16_t* pRSI4x4 = &pRS[i << 4];
+      pIdctResAddPredFunc (pPredI4x4, iLumaStride, pRSI4x4);
+    }
+  }
+
+  return ERR_NONE;
+}
+
+
+int32_t RecI4x4Chroma (int32_t iMBXY, PWelsDecoderContext pCtx, int16_t* pScoeffLevel, PDqLayer pDqLayer) {
+  int32_t iChromaStride = pCtx->pCurDqLayer->pDec->iLinesize[1];
+
+  int8_t iChromaPredMode = pDqLayer->pChromaPredMode[iMBXY];
+
+  PGetIntraPredFunc* pGetIChromaPredFunc = pCtx->pGetIChromaPredFunc;
+
+  uint8_t* pPred = pDqLayer->pPred[1];
+
+  pGetIChromaPredFunc[iChromaPredMode] (pPred, iChromaStride);
+  pPred = pDqLayer->pPred[2];
+  pGetIChromaPredFunc[iChromaPredMode] (pPred, iChromaStride);
+
+  RecChroma (iMBXY, pCtx, pScoeffLevel, pDqLayer);
+
+  return ERR_NONE;
+}
+
+
+int32_t RecI16x16Mb (int32_t iMBXY, PWelsDecoderContext pCtx, int16_t* pScoeffLevel, PDqLayer pDqLayer) {
+  /*decoder use, encoder no use*/
+  int8_t iI16x16PredMode = pDqLayer->pIntraPredMode[iMBXY][7];
+  int8_t iChromaPredMode = pDqLayer->pChromaPredMode[iMBXY];
+  PGetIntraPredFunc* pGetIChromaPredFunc = pCtx->pGetIChromaPredFunc;
+  PGetIntraPredFunc* pGetI16x16LumaPredFunc = pCtx->pGetI16x16LumaPredFunc;
+  int32_t iUVStride = pCtx->pCurDqLayer->pDec->iLinesize[1];
+
+  /*common use by decoder&encoder*/
+  int32_t iYStride = pDqLayer->iLumaStride;
+  int16_t* pRS = pScoeffLevel;
+
+  uint8_t* pPred = pDqLayer->pPred[0];
+
+  PIdctFourResAddPredFunc pIdctFourResAddPredFunc = pCtx->pIdctFourResAddPredFunc;
+
+  /*decode i16x16 y*/
+  pGetI16x16LumaPredFunc[iI16x16PredMode] (pPred, iYStride);
+
+  /*1 mb is divided 16 4x4_block to idct*/
+  const int8_t* pNzc = pDqLayer->pNzc[iMBXY];
+  pIdctFourResAddPredFunc (pPred + 0 * iYStride + 0, iYStride, pRS + 0 * 64, pNzc +  0);
+  pIdctFourResAddPredFunc (pPred + 0 * iYStride + 8, iYStride, pRS + 1 * 64, pNzc +  2);
+  pIdctFourResAddPredFunc (pPred + 8 * iYStride + 0, iYStride, pRS + 2 * 64, pNzc +  8);
+  pIdctFourResAddPredFunc (pPred + 8 * iYStride + 8, iYStride, pRS + 3 * 64, pNzc + 10);
+
+  /*decode intra mb cb&cr*/
+  pPred = pDqLayer->pPred[1];
+  pGetIChromaPredFunc[iChromaPredMode] (pPred, iUVStride);
+  pPred = pDqLayer->pPred[2];
+  pGetIChromaPredFunc[iChromaPredMode] (pPred, iUVStride);
+  RecChroma (iMBXY, pCtx, pScoeffLevel, pDqLayer);
+
+  return ERR_NONE;
+}
+
+
+//according to current 8*8 block ref_index to gain reference picture
+static inline void GetRefPic (sMCRefMember* pMCRefMem, PWelsDecoderContext pCtx, int8_t* pRefIdxList,
+                              int32_t iIndex, int32_t listIdx) {
+  PPicture pRefPic;
+
+  int8_t iRefIdx = pRefIdxList[iIndex];
+  if (iRefIdx >= 0) {
+    pRefPic = pCtx->sRefPic.pRefList[listIdx][iRefIdx];
+
+    if (pRefPic != NULL) {
+      pMCRefMem->iSrcLineLuma = pRefPic->iLinesize[0];
+      pMCRefMem->iSrcLineChroma = pRefPic->iLinesize[1];
+
+      pMCRefMem->pSrcY = pRefPic->pData[0];
+      pMCRefMem->pSrcU = pRefPic->pData[1];
+      pMCRefMem->pSrcV = pRefPic->pData[2];
+    }
+  }
+}
+
+
+#ifndef MC_FLOW_SIMPLE_JUDGE
+#define MC_FLOW_SIMPLE_JUDGE 1
+#endif //MC_FLOW_SIMPLE_JUDGE
+void BaseMC (sMCRefMember* pMCRefMem, int32_t iXOffset, int32_t iYOffset, SMcFunc* pMCFunc,
+             int32_t iBlkWidth, int32_t iBlkHeight, int16_t iMVs[2]) {
+  int32_t iFullMVx = (iXOffset << 2) + iMVs[0]; //quarter pixel
+  int32_t iFullMVy = (iYOffset << 2) + iMVs[1];
+  iFullMVx = WELS_CLIP3 (iFullMVx, ((-PADDING_LENGTH + 2) * (1 << 2)),
+                         ((pMCRefMem->iPicWidth + PADDING_LENGTH - 19) * (1 << 2)));
+  iFullMVy = WELS_CLIP3 (iFullMVy, ((-PADDING_LENGTH + 2) * (1 << 2)),
+                         ((pMCRefMem->iPicHeight + PADDING_LENGTH - 19) * (1 << 2)));
+
+  int32_t iSrcPixOffsetLuma = (iFullMVx >> 2) + (iFullMVy >> 2) * pMCRefMem->iSrcLineLuma;
+  int32_t iSrcPixOffsetChroma = (iFullMVx >> 3) + (iFullMVy >> 3) * pMCRefMem->iSrcLineChroma;
+
+  int32_t iBlkWidthChroma = iBlkWidth >> 1;
+  int32_t iBlkHeightChroma = iBlkHeight >> 1;
+
+  uint8_t* pSrcY = pMCRefMem->pSrcY + iSrcPixOffsetLuma;
+  uint8_t* pSrcU = pMCRefMem->pSrcU + iSrcPixOffsetChroma;
+  uint8_t* pSrcV = pMCRefMem->pSrcV + iSrcPixOffsetChroma;
+  uint8_t* pDstY = pMCRefMem->pDstY;
+  uint8_t* pDstU = pMCRefMem->pDstU;
+  uint8_t* pDstV = pMCRefMem->pDstV;
+
+  pMCFunc->pMcLumaFunc (pSrcY, pMCRefMem->iSrcLineLuma, pDstY, pMCRefMem->iDstLineLuma, iFullMVx, iFullMVy, iBlkWidth,
+                        iBlkHeight);
+  pMCFunc->pMcChromaFunc (pSrcU, pMCRefMem->iSrcLineChroma, pDstU, pMCRefMem->iDstLineChroma, iFullMVx, iFullMVy,
+                          iBlkWidthChroma, iBlkHeightChroma);
+  pMCFunc->pMcChromaFunc (pSrcV, pMCRefMem->iSrcLineChroma, pDstV, pMCRefMem->iDstLineChroma, iFullMVx, iFullMVy,
+                          iBlkWidthChroma, iBlkHeightChroma);
+
+}
+
+static void WeightPrediction (PDqLayer pCurDqLayer, sMCRefMember* pMCRefMem, int32_t listIdx, int32_t iRefIdx,
+                              int32_t iBlkWidth,
+                              int32_t iBlkHeight) {
+
+
+  int32_t iLog2denom, iWoc, iOoc;
+  int32_t iPredTemp, iLineStride;
+  int32_t iPixel = 0;
+  uint8_t* pDst;
+  //luma
+  iLog2denom = pCurDqLayer->pPredWeightTable->uiLumaLog2WeightDenom;
+  iWoc = pCurDqLayer->pPredWeightTable->sPredList[listIdx].iLumaWeight[iRefIdx];
+  iOoc = pCurDqLayer->pPredWeightTable->sPredList[listIdx].iLumaOffset[iRefIdx];
+  iLineStride = pMCRefMem->iDstLineLuma;
+
+  for (int i = 0; i < iBlkHeight; i++) {
+    for (int j = 0; j < iBlkWidth; j++) {
+      iPixel = j + i * (iLineStride);
+      if (iLog2denom >= 1) {
+        iPredTemp = ((pMCRefMem->pDstY[iPixel] * iWoc + (1 << (iLog2denom - 1))) >> iLog2denom) + iOoc;
+
+        pMCRefMem->pDstY[iPixel] = WELS_CLIP3 (iPredTemp, 0, 255);
+      } else {
+        iPredTemp = pMCRefMem->pDstY[iPixel] * iWoc + iOoc;
+
+        pMCRefMem->pDstY[iPixel] = WELS_CLIP3 (iPredTemp, 0, 255);
+
+      }
+    }
+  }
+
+
+  //UV
+  iBlkWidth = iBlkWidth >> 1;
+  iBlkHeight = iBlkHeight >> 1;
+  iLog2denom = pCurDqLayer->pPredWeightTable->uiChromaLog2WeightDenom;
+  iLineStride = pMCRefMem->iDstLineChroma;
+
+  for (int i = 0; i < 2; i++) {
+
+
+    //iLog2denom = pCurDqLayer->pPredWeightTable->uiChromaLog2WeightDenom;
+    iWoc =  pCurDqLayer->pPredWeightTable->sPredList[listIdx].iChromaWeight[iRefIdx][i];
+    iOoc = pCurDqLayer->pPredWeightTable->sPredList[listIdx].iChromaOffset[iRefIdx][i];
+    pDst = i ? pMCRefMem->pDstV : pMCRefMem->pDstU;
+    //iLineStride = pMCRefMem->iDstLineChroma;
+
+    for (int i = 0; i < iBlkHeight ; i++) {
+      for (int j = 0; j < iBlkWidth; j++) {
+        iPixel = j + i * (iLineStride);
+        if (iLog2denom >= 1) {
+          iPredTemp = ((pDst[iPixel] * iWoc + (1 << (iLog2denom - 1))) >> iLog2denom) + iOoc;
+
+          pDst[iPixel] = WELS_CLIP3 (iPredTemp, 0, 255);
+        } else {
+          iPredTemp = pDst[iPixel] * iWoc + iOoc;
+
+          pDst[iPixel] = WELS_CLIP3 (iPredTemp, 0, 255);
+
+        }
+      }
+
+    }
+
+
+  }
+}
+
+static void BiWeightPrediction (PDqLayer pCurDqLayer, sMCRefMember* pMCRefMem, sMCRefMember* pTempMCRefMem,
+                                int32_t iRefIdx1, int32_t iRefIdx2, bool bWeightedBipredIdcIs1, int32_t iBlkWidth,
+                                int32_t iBlkHeight) {
+  int32_t iWoc1 = 0, iOoc1 = 0, iWoc2 = 0, iOoc2 = 0;
+  int32_t iPredTemp, iLineStride;
+  int32_t iPixel = 0;
+  //luma
+  int32_t iLog2denom = pCurDqLayer->pPredWeightTable->uiLumaLog2WeightDenom;
+  if (bWeightedBipredIdcIs1) {
+    iWoc1 = pCurDqLayer->pPredWeightTable->sPredList[LIST_0].iLumaWeight[iRefIdx1];
+    iOoc1 = pCurDqLayer->pPredWeightTable->sPredList[LIST_0].iLumaOffset[iRefIdx1];
+    iWoc2 = pCurDqLayer->pPredWeightTable->sPredList[LIST_1].iLumaWeight[iRefIdx2];
+    iOoc2 = pCurDqLayer->pPredWeightTable->sPredList[LIST_1].iLumaOffset[iRefIdx2];
+  } else {
+    iWoc1 = pCurDqLayer->pPredWeightTable->iImplicitWeight[iRefIdx1][iRefIdx2];
+    iWoc2 = 64 - iWoc1;
+  }
+  iLineStride = pMCRefMem->iDstLineLuma;
+
+  for (int i = 0; i < iBlkHeight; i++) {
+    for (int j = 0; j < iBlkWidth; j++) {
+      iPixel = j + i * (iLineStride);
+      iPredTemp = ((pMCRefMem->pDstY[iPixel] * iWoc1 + pTempMCRefMem->pDstY[iPixel] * iWoc2 + (1 << iLog2denom)) >>
+                   (iLog2denom + 1)) + ((iOoc1 + iOoc2 + 1) >> 1);
+      pMCRefMem->pDstY[iPixel] = WELS_CLIP3 (iPredTemp, 0, 255);
+    }
+  }
+
+  //UV
+  iBlkWidth = iBlkWidth >> 1;
+  iBlkHeight = iBlkHeight >> 1;
+  iLog2denom = pCurDqLayer->pPredWeightTable->uiChromaLog2WeightDenom;
+  iLineStride = pMCRefMem->iDstLineChroma;
+
+  uint8_t* pDst;
+  uint8_t* pTempDst;
+  for (int k = 0; k < 2; k++) {
+    //iLog2denom = pCurDqLayer->pPredWeightTable->uiChromaLog2WeightDenom;
+    if (bWeightedBipredIdcIs1) {
+      iWoc1 = pCurDqLayer->pPredWeightTable->sPredList[LIST_0].iChromaWeight[iRefIdx1][k];
+      iOoc1 = pCurDqLayer->pPredWeightTable->sPredList[LIST_0].iChromaOffset[iRefIdx1][k];
+      iWoc2 = pCurDqLayer->pPredWeightTable->sPredList[LIST_1].iChromaWeight[iRefIdx2][k];
+      iOoc2 = pCurDqLayer->pPredWeightTable->sPredList[LIST_1].iChromaOffset[iRefIdx2][k];
+    }
+    pDst  = k ? pMCRefMem->pDstV : pMCRefMem->pDstU;
+    pTempDst = k ? pTempMCRefMem->pDstV : pTempMCRefMem->pDstU;
+    //iLineStride = pMCRefMem->iDstLineChroma;
+
+    for (int i = 0; i < iBlkHeight; i++) {
+      for (int j = 0; j < iBlkWidth; j++) {
+        iPixel = j + i * (iLineStride);
+        iPredTemp = ((pDst[iPixel] * iWoc1 + pTempDst[iPixel] * iWoc2 + (1 << iLog2denom)) >> (iLog2denom + 1)) + ((
+                      iOoc1 + iOoc2 + 1) >> 1);
+        pDst[iPixel] = WELS_CLIP3 (iPredTemp, 0, 255);
+      }
+    }
+  }
+}
+
+static void BiPrediction (PDqLayer pCurDqLayer, sMCRefMember* pMCRefMem, sMCRefMember* pTempMCRefMem, int32_t iBlkWidth,
+                          int32_t iBlkHeight) {
+  int32_t iPredTemp, iLineStride;
+  int32_t iPixel = 0;
+  //luma
+  iLineStride = pMCRefMem->iDstLineLuma;
+
+  for (int i = 0; i < iBlkHeight; i++) {
+    for (int j = 0; j < iBlkWidth; j++) {
+      iPixel = j + i * (iLineStride);
+      iPredTemp = (pMCRefMem->pDstY[iPixel] + pTempMCRefMem->pDstY[iPixel] + 1) >> 1;
+      pMCRefMem->pDstY[iPixel] = WELS_CLIP3 (iPredTemp, 0, 255);
+    }
+  }
+
+  //UV
+  iBlkWidth = iBlkWidth >> 1;
+  iBlkHeight = iBlkHeight >> 1;
+  iLineStride = pMCRefMem->iDstLineChroma;
+
+  uint8_t* pDst;
+  uint8_t* pTempDst;
+  for (int k = 0; k < 2; k++) {
+    pDst = k ? pMCRefMem->pDstV : pMCRefMem->pDstU;
+    pTempDst = k ? pTempMCRefMem->pDstV : pTempMCRefMem->pDstU;
+    //iLineStride = pMCRefMem->iDstLineChroma;
+
+    for (int i = 0; i < iBlkHeight; i++) {
+      for (int j = 0; j < iBlkWidth; j++) {
+        iPixel = j + i * (iLineStride);
+        iPredTemp = (pDst[iPixel] + pTempDst[iPixel] + 1) >> 1;
+        pDst[iPixel] = WELS_CLIP3 (iPredTemp, 0, 255);
+      }
+    }
+  }
+}
+
+void GetInterPred (uint8_t* pPredY, uint8_t* pPredCb, uint8_t* pPredCr, PWelsDecoderContext pCtx) {
+  sMCRefMember pMCRefMem;
+  PDqLayer pCurDqLayer = pCtx->pCurDqLayer;
+  SMcFunc* pMCFunc = &pCtx->sMcFunc;
+
+  int32_t iMBXY = pCurDqLayer->iMbXyIndex;
+
+  int16_t iMVs[2] = {0};
+
+  uint32_t iMBType = pCurDqLayer->pMbType[iMBXY];
+
+  int32_t iMBOffsetX = pCurDqLayer->iMbX << 4;
+  int32_t iMBOffsetY = pCurDqLayer->iMbY << 4;
+
+  int32_t iDstLineLuma   = pCtx->pDec->iLinesize[0];
+  int32_t iDstLineChroma = pCtx->pDec->iLinesize[1];
+
+  int32_t iBlk8X, iBlk8Y, iBlk4X, iBlk4Y, i, j, iIIdx, iJIdx;
+
+  pMCRefMem.iPicWidth = (pCurDqLayer->sLayerInfo.sSliceInLayer.sSliceHeaderExt.sSliceHeader.iMbWidth << 4);
+  pMCRefMem.iPicHeight = (pCurDqLayer->sLayerInfo.sSliceInLayer.sSliceHeaderExt.sSliceHeader.iMbHeight << 4);
+
+  pMCRefMem.pDstY = pPredY;
+  pMCRefMem.pDstU = pPredCb;
+  pMCRefMem.pDstV = pPredCr;
+
+  pMCRefMem.iDstLineLuma   = iDstLineLuma;
+  pMCRefMem.iDstLineChroma = iDstLineChroma;
+
+  int32_t iRefIndex = 0;
+
+  switch (iMBType) {
+  case MB_TYPE_SKIP:
+  case MB_TYPE_16x16:
+    iMVs[0] = pCurDqLayer->pMv[0][iMBXY][0][0];
+    iMVs[1] = pCurDqLayer->pMv[0][iMBXY][0][1];
+    GetRefPic (&pMCRefMem, pCtx, pCurDqLayer->pRefIndex[0][iMBXY], 0, LIST_0);
+    BaseMC (&pMCRefMem, iMBOffsetX, iMBOffsetY, pMCFunc, 16, 16, iMVs);
+
+    if (pCurDqLayer->bUseWeightPredictionFlag) {
+      iRefIndex = pCurDqLayer->pRefIndex[0][iMBXY][0];
+      WeightPrediction (pCurDqLayer, &pMCRefMem, LIST_0, iRefIndex, 16, 16);
+    }
+    break;
+  case MB_TYPE_16x8:
+    iMVs[0] = pCurDqLayer->pMv[0][iMBXY][0][0];
+    iMVs[1] = pCurDqLayer->pMv[0][iMBXY][0][1];
+    GetRefPic (&pMCRefMem, pCtx, pCurDqLayer->pRefIndex[0][iMBXY], 0, LIST_0);
+    BaseMC (&pMCRefMem, iMBOffsetX, iMBOffsetY, pMCFunc, 16, 8, iMVs);
+
+    if (pCurDqLayer->bUseWeightPredictionFlag) {
+      iRefIndex = pCurDqLayer->pRefIndex[0][iMBXY][0];
+      WeightPrediction (pCurDqLayer, &pMCRefMem, LIST_0, iRefIndex, 16, 8);
+    }
+
+    iMVs[0] = pCurDqLayer->pMv[0][iMBXY][8][0];
+    iMVs[1] = pCurDqLayer->pMv[0][iMBXY][8][1];
+    GetRefPic (&pMCRefMem, pCtx, pCurDqLayer->pRefIndex[0][iMBXY], 8, LIST_0);
+    pMCRefMem.pDstY = pPredY  + (iDstLineLuma << 3);
+    pMCRefMem.pDstU = pPredCb + (iDstLineChroma << 2);
+    pMCRefMem.pDstV = pPredCr + (iDstLineChroma << 2);
+    BaseMC (&pMCRefMem, iMBOffsetX, iMBOffsetY + 8, pMCFunc, 16, 8, iMVs);
+
+    if (pCurDqLayer->bUseWeightPredictionFlag) {
+      iRefIndex = pCurDqLayer->pRefIndex[0][iMBXY][8];
+      WeightPrediction (pCurDqLayer, &pMCRefMem, LIST_0, iRefIndex, 16, 8);
+    }
+    break;
+  case MB_TYPE_8x16:
+    iMVs[0] = pCurDqLayer->pMv[0][iMBXY][0][0];
+    iMVs[1] = pCurDqLayer->pMv[0][iMBXY][0][1];
+    GetRefPic (&pMCRefMem, pCtx, pCurDqLayer->pRefIndex[0][iMBXY], 0, LIST_0);
+    BaseMC (&pMCRefMem, iMBOffsetX, iMBOffsetY, pMCFunc, 8, 16, iMVs);
+    if (pCurDqLayer->bUseWeightPredictionFlag) {
+      iRefIndex = pCurDqLayer->pRefIndex[0][iMBXY][0];
+      WeightPrediction (pCurDqLayer, &pMCRefMem, LIST_0, iRefIndex, 8, 16);
+    }
+
+    iMVs[0] = pCurDqLayer->pMv[0][iMBXY][2][0];
+    iMVs[1] = pCurDqLayer->pMv[0][iMBXY][2][1];
+    GetRefPic (&pMCRefMem, pCtx, pCurDqLayer->pRefIndex[0][iMBXY], 2, LIST_0);
+    pMCRefMem.pDstY = pPredY + 8;
+    pMCRefMem.pDstU = pPredCb + 4;
+    pMCRefMem.pDstV = pPredCr + 4;
+    BaseMC (&pMCRefMem, iMBOffsetX + 8, iMBOffsetY, pMCFunc, 8, 16, iMVs);
+
+    if (pCurDqLayer->bUseWeightPredictionFlag) {
+      iRefIndex = pCurDqLayer->pRefIndex[0][iMBXY][2];
+      WeightPrediction (pCurDqLayer, &pMCRefMem, LIST_0, iRefIndex, 8, 16);
+    }
+    break;
+  case MB_TYPE_8x8:
+  case MB_TYPE_8x8_REF0: {
+    uint32_t iSubMBType;
+    int32_t iXOffset, iYOffset;
+    uint8_t* pDstY, *pDstU, *pDstV;
+    for (i = 0; i < 4; i++) {
+      iSubMBType = pCurDqLayer->pSubMbType[iMBXY][i];
+      iBlk8X = (i & 1) << 3;
+      iBlk8Y = (i >> 1) << 3;
+      iXOffset = iMBOffsetX + iBlk8X;
+      iYOffset = iMBOffsetY + iBlk8Y;
+
+      iIIdx = ((i >> 1) << 3) + ((i & 1) << 1);
+      GetRefPic (&pMCRefMem, pCtx, pCurDqLayer->pRefIndex[0][iMBXY], iIIdx, LIST_0);
+      iRefIndex = pCurDqLayer->bUseWeightPredictionFlag ? pCurDqLayer->pRefIndex[0][iMBXY][iIIdx] : 0;
+
+      pDstY = pPredY + iBlk8X + iBlk8Y * iDstLineLuma;
+      pDstU = pPredCb + (iBlk8X >> 1) + (iBlk8Y >> 1) * iDstLineChroma;
+      pDstV = pPredCr + (iBlk8X >> 1) + (iBlk8Y >> 1) * iDstLineChroma;
+      pMCRefMem.pDstY = pDstY;
+      pMCRefMem.pDstU = pDstU;
+      pMCRefMem.pDstV = pDstV;
+      switch (iSubMBType) {
+      case SUB_MB_TYPE_8x8:
+        iMVs[0] = pCurDqLayer->pMv[0][iMBXY][iIIdx][0];
+        iMVs[1] = pCurDqLayer->pMv[0][iMBXY][iIIdx][1];
+        BaseMC (&pMCRefMem, iXOffset, iYOffset, pMCFunc, 8, 8, iMVs);
+        if (pCurDqLayer->bUseWeightPredictionFlag) {
+
+          WeightPrediction (pCurDqLayer, &pMCRefMem, LIST_0, iRefIndex, 8, 8);
+        }
+
+        break;
+      case SUB_MB_TYPE_8x4:
+        iMVs[0] = pCurDqLayer->pMv[0][iMBXY][iIIdx][0];
+        iMVs[1] = pCurDqLayer->pMv[0][iMBXY][iIIdx][1];
+        BaseMC (&pMCRefMem, iXOffset, iYOffset, pMCFunc, 8, 4, iMVs);
+        if (pCurDqLayer->bUseWeightPredictionFlag) {
+
+          WeightPrediction (pCurDqLayer, &pMCRefMem, LIST_0, iRefIndex, 8, 4);
+        }
+
+
+        iMVs[0] = pCurDqLayer->pMv[0][iMBXY][iIIdx + 4][0];
+        iMVs[1] = pCurDqLayer->pMv[0][iMBXY][iIIdx + 4][1];
+        pMCRefMem.pDstY += (iDstLineLuma << 2);
+        pMCRefMem.pDstU += (iDstLineChroma << 1);
+        pMCRefMem.pDstV += (iDstLineChroma << 1);
+        BaseMC (&pMCRefMem, iXOffset, iYOffset + 4, pMCFunc, 8, 4, iMVs);
+        if (pCurDqLayer->bUseWeightPredictionFlag) {
+
+          WeightPrediction (pCurDqLayer, &pMCRefMem, LIST_0, iRefIndex, 8, 4);
+        }
+
+        break;
+      case SUB_MB_TYPE_4x8:
+        iMVs[0] = pCurDqLayer->pMv[0][iMBXY][iIIdx][0];
+        iMVs[1] = pCurDqLayer->pMv[0][iMBXY][iIIdx][1];
+        BaseMC (&pMCRefMem, iXOffset, iYOffset, pMCFunc, 4, 8, iMVs);
+        if (pCurDqLayer->bUseWeightPredictionFlag) {
+
+          WeightPrediction (pCurDqLayer, &pMCRefMem, LIST_0, iRefIndex, 4, 8);
+        }
+
+
+        iMVs[0] = pCurDqLayer->pMv[0][iMBXY][iIIdx + 1][0];
+        iMVs[1] = pCurDqLayer->pMv[0][iMBXY][iIIdx + 1][1];
+        pMCRefMem.pDstY += 4;
+        pMCRefMem.pDstU += 2;
+        pMCRefMem.pDstV += 2;
+        BaseMC (&pMCRefMem, iXOffset + 4, iYOffset, pMCFunc, 4, 8, iMVs);
+        if (pCurDqLayer->bUseWeightPredictionFlag) {
+
+          WeightPrediction (pCurDqLayer, &pMCRefMem, LIST_0, iRefIndex, 4, 8);
+        }
+
+        break;
+      case SUB_MB_TYPE_4x4: {
+        for (j = 0; j < 4; j++) {
+          int32_t iUVLineStride;
+          iJIdx = ((j >> 1) << 2) + (j & 1);
+
+          iBlk4X = (j & 1) << 2;
+          iBlk4Y = (j >> 1) << 2;
+
+          iUVLineStride = (iBlk4X >> 1) + (iBlk4Y >> 1) * iDstLineChroma;
+          pMCRefMem.pDstY = pDstY + iBlk4X + iBlk4Y * iDstLineLuma;
+          pMCRefMem.pDstU = pDstU + iUVLineStride;
+          pMCRefMem.pDstV = pDstV + iUVLineStride;
+
+          iMVs[0] = pCurDqLayer->pMv[0][iMBXY][iIIdx + iJIdx][0];
+          iMVs[1] = pCurDqLayer->pMv[0][iMBXY][iIIdx + iJIdx][1];
+          BaseMC (&pMCRefMem, iXOffset + iBlk4X, iYOffset + iBlk4Y, pMCFunc, 4, 4, iMVs);
+          if (pCurDqLayer->bUseWeightPredictionFlag) {
+
+            WeightPrediction (pCurDqLayer, &pMCRefMem, LIST_0, iRefIndex, 4, 4);
+          }
+
+        }
+      }
+      break;
+      default:
+        break;
+      }
+    }
+  }
+  break;
+  default:
+    break;
+  }
+}
+
+void GetInterBPred (uint8_t* pPredYCbCr[3], uint8_t* pTempPredYCbCr[3], PWelsDecoderContext pCtx) {
+  sMCRefMember pMCRefMem;
+  sMCRefMember pTempMCRefMem;
+
+  PDqLayer pCurDqLayer = pCtx->pCurDqLayer;
+  SMcFunc* pMCFunc = &pCtx->sMcFunc;
+
+  int32_t iMBXY = pCurDqLayer->iMbXyIndex;
+
+  int16_t iMVs[2] = { 0 };
+
+  uint32_t iMBType = pCurDqLayer->pMbType[iMBXY];
+
+  int32_t iMBOffsetX = pCurDqLayer->iMbX << 4;
+  int32_t iMBOffsetY = pCurDqLayer->iMbY << 4;
+
+  int32_t iDstLineLuma = pCtx->pDec->iLinesize[0];
+  int32_t iDstLineChroma = pCtx->pDec->iLinesize[1];
+
+
+  pMCRefMem.iPicWidth = (pCurDqLayer->sLayerInfo.sSliceInLayer.sSliceHeaderExt.sSliceHeader.iMbWidth << 4);
+  pMCRefMem.iPicHeight = (pCurDqLayer->sLayerInfo.sSliceInLayer.sSliceHeaderExt.sSliceHeader.iMbHeight << 4);
+
+  pMCRefMem.pDstY = pPredYCbCr[0];
+  pMCRefMem.pDstU = pPredYCbCr[1];
+  pMCRefMem.pDstV = pPredYCbCr[2];
+
+  pMCRefMem.iDstLineLuma = iDstLineLuma;
+  pMCRefMem.iDstLineChroma = iDstLineChroma;
+
+  pTempMCRefMem = pMCRefMem;
+  pTempMCRefMem.pDstY = pTempPredYCbCr[0];
+  pTempMCRefMem.pDstU = pTempPredYCbCr[1];
+  pTempMCRefMem.pDstV = pTempPredYCbCr[2];
+
+
+  int32_t iRefIndex1 = 0;
+  int32_t iRefIndex2 = 0;
+
+  bool bWeightedBipredIdcIs1 = pCurDqLayer->sLayerInfo.pPps->uiWeightedBipredIdc == 1;
+
+  if (IS_INTER_16x16 (iMBType)) {
+    if (IS_TYPE_L0 (iMBType) && IS_TYPE_L1 (iMBType)) {
+      iMVs[0] = pCurDqLayer->pMv[LIST_0][iMBXY][0][0];
+      iMVs[1] = pCurDqLayer->pMv[LIST_0][iMBXY][0][1];
+      GetRefPic (&pMCRefMem, pCtx, pCurDqLayer->pRefIndex[LIST_0][iMBXY], 0, LIST_0);
+      BaseMC (&pMCRefMem, iMBOffsetX, iMBOffsetY, pMCFunc, 16, 16, iMVs);
+
+      iMVs[0] = pCurDqLayer->pMv[LIST_1][iMBXY][0][0];
+      iMVs[1] = pCurDqLayer->pMv[LIST_1][iMBXY][0][1];
+      GetRefPic (&pTempMCRefMem, pCtx, pCurDqLayer->pRefIndex[LIST_1][iMBXY], 0, LIST_1);
+      BaseMC (&pTempMCRefMem, iMBOffsetX, iMBOffsetY, pMCFunc, 16, 16, iMVs);
+      iRefIndex1 = pCurDqLayer->pRefIndex[LIST_0][iMBXY][0];
+      iRefIndex2 = pCurDqLayer->pRefIndex[LIST_1][iMBXY][0];
+      if (pCurDqLayer->bUseWeightedBiPredIdc) {
+        BiWeightPrediction (pCurDqLayer, &pMCRefMem, &pTempMCRefMem, iRefIndex1, iRefIndex2, bWeightedBipredIdcIs1, 16, 16);
+      } else {
+        BiPrediction (pCurDqLayer, &pMCRefMem, &pTempMCRefMem,  16, 16);
+      }
+    } else {
+      int32_t listIdx = (iMBType & MB_TYPE_P0L0) ? LIST_0 : LIST_1;
+      iMVs[0] = pCurDqLayer->pMv[listIdx][iMBXY][0][0];
+      iMVs[1] = pCurDqLayer->pMv[listIdx][iMBXY][0][1];
+      GetRefPic (&pMCRefMem, pCtx, pCurDqLayer->pRefIndex[listIdx][iMBXY], 0, listIdx);
+      BaseMC (&pMCRefMem, iMBOffsetX, iMBOffsetY, pMCFunc, 16, 16, iMVs);
+      if (bWeightedBipredIdcIs1) {
+        int32_t iRefIndex = pCurDqLayer->pRefIndex[listIdx][iMBXY][0];
+        WeightPrediction (pCurDqLayer, &pMCRefMem, listIdx, iRefIndex, 16, 16);
+      }
+    }
+  } else if (IS_INTER_16x8 (iMBType)) {
+    for (int32_t i = 0; i < 2; ++i) {
+      int32_t iPartIdx = i << 3;
+      uint32_t listCount = 0;
+      int32_t lastListIdx = LIST_0;
+      for (int32_t listIdx = LIST_0; listIdx < LIST_A; ++listIdx) {
+        if (IS_DIR (iMBType, i, listIdx)) {
+          lastListIdx = listIdx;
+          iMVs[0] = pCurDqLayer->pMv[listIdx][iMBXY][iPartIdx][0];
+          iMVs[1] = pCurDqLayer->pMv[listIdx][iMBXY][iPartIdx][1];
+          GetRefPic (&pMCRefMem, pCtx, pCurDqLayer->pRefIndex[listIdx][iMBXY], iPartIdx, listIdx);
+          if (i) {
+            pMCRefMem.pDstY += (iDstLineLuma << 3);
+            pMCRefMem.pDstU += (iDstLineChroma << 2);
+            pMCRefMem.pDstV += (iDstLineChroma << 2);
+          }
+          BaseMC (&pMCRefMem, iMBOffsetX, iMBOffsetY + iPartIdx, pMCFunc, 16, 8, iMVs);
+          if (++listCount == 2) {
+            iMVs[0] = pCurDqLayer->pMv[LIST_1][iMBXY][iPartIdx][0];
+            iMVs[1] = pCurDqLayer->pMv[LIST_1][iMBXY][iPartIdx][1];
+            GetRefPic (&pTempMCRefMem, pCtx, pCurDqLayer->pRefIndex[LIST_1][iMBXY], iPartIdx, LIST_1);
+            if (i) {
+              pTempMCRefMem.pDstY += (iDstLineLuma << 3);
+              pTempMCRefMem.pDstU += (iDstLineChroma << 2);
+              pTempMCRefMem.pDstV += (iDstLineChroma << 2);
+            }
+            BaseMC (&pTempMCRefMem, iMBOffsetX, iMBOffsetY + iPartIdx, pMCFunc, 16, 8, iMVs);
+            if (pCurDqLayer->bUseWeightedBiPredIdc) {
+              iRefIndex1 = pCurDqLayer->pRefIndex[LIST_0][iMBXY][iPartIdx];
+              iRefIndex2 = pCurDqLayer->pRefIndex[LIST_1][iMBXY][iPartIdx];
+              BiWeightPrediction (pCurDqLayer, &pMCRefMem, &pTempMCRefMem, iRefIndex1, iRefIndex2, bWeightedBipredIdcIs1, 16, 8);
+            } else {
+              BiPrediction (pCurDqLayer, &pMCRefMem, &pTempMCRefMem, 16, 8);
+            }
+          }
+        }
+      }
+      if (listCount == 1) {
+        if (bWeightedBipredIdcIs1) {
+          int32_t iRefIndex = pCurDqLayer->pRefIndex[lastListIdx][iMBXY][iPartIdx];
+          WeightPrediction (pCurDqLayer, &pMCRefMem, lastListIdx, iRefIndex, 16, 8);
+        }
+      }
+    }
+  } else if (IS_INTER_8x16 (iMBType)) {
+    for (int32_t i = 0; i < 2; ++i) {
+      uint32_t listCount = 0;
+      int32_t lastListIdx = LIST_0;
+      for (int32_t listIdx = LIST_0; listIdx < LIST_A; ++listIdx) {
+        if (IS_DIR (iMBType, i, listIdx)) {
+          lastListIdx = listIdx;
+          iMVs[0] = pCurDqLayer->pMv[listIdx][iMBXY][i << 1][0];
+          iMVs[1] = pCurDqLayer->pMv[listIdx][iMBXY][i << 1][1];
+          GetRefPic (&pMCRefMem, pCtx, pCurDqLayer->pRefIndex[listIdx][iMBXY], i << 1, listIdx);
+          if (i) {
+            pMCRefMem.pDstY += 8;
+            pMCRefMem.pDstU += 4;
+            pMCRefMem.pDstV += 4;
+          }
+          BaseMC (&pMCRefMem, iMBOffsetX + (i ? 8 : 0), iMBOffsetY, pMCFunc, 8, 16, iMVs);
+          if (++listCount == 2) {
+            iMVs[0] = pCurDqLayer->pMv[LIST_1][iMBXY][i << 1][0];
+            iMVs[1] = pCurDqLayer->pMv[LIST_1][iMBXY][i << 1][1];
+            GetRefPic (&pTempMCRefMem, pCtx, pCurDqLayer->pRefIndex[LIST_1][iMBXY], i << 1, LIST_1);
+            if (i) {
+              pTempMCRefMem.pDstY += 8;
+              pTempMCRefMem.pDstU += 4;
+              pTempMCRefMem.pDstV += 4;
+            }
+            BaseMC (&pTempMCRefMem, iMBOffsetX + (i ? 8 : 0), iMBOffsetY, pMCFunc, 8, 16, iMVs);
+            if (pCurDqLayer->bUseWeightedBiPredIdc) {
+              iRefIndex1 = pCurDqLayer->pRefIndex[LIST_0][iMBXY][i << 1];
+              iRefIndex2 = pCurDqLayer->pRefIndex[LIST_1][iMBXY][i << 1];
+              BiWeightPrediction (pCurDqLayer, &pMCRefMem, &pTempMCRefMem, iRefIndex1, iRefIndex2, bWeightedBipredIdcIs1, 8, 16);
+            } else {
+              BiPrediction (pCurDqLayer, &pMCRefMem, &pTempMCRefMem, 8, 16);
+            }
+          }
+        }
+      }
+      if (listCount == 1) {
+        if (bWeightedBipredIdcIs1) {
+          int32_t iRefIndex = pCurDqLayer->pRefIndex[lastListIdx][iMBXY][i << 1];
+          WeightPrediction (pCurDqLayer, &pMCRefMem, lastListIdx, iRefIndex, 8, 16);
+        }
+      }
+    }
+  } else if (IS_Inter_8x8 (iMBType)) {
+    int32_t iBlk8X, iBlk8Y, iBlk4X, iBlk4Y, iIIdx, iJIdx;
+    uint32_t iSubMBType;
+    int32_t iXOffset, iYOffset;
+    uint8_t* pDstY, *pDstU, *pDstV;
+    uint8_t* pDstY2, *pDstU2, *pDstV2;
+    for (int32_t i = 0; i < 4; i++) {
+      iSubMBType = pCurDqLayer->pSubMbType[iMBXY][i];
+      iBlk8X = (i & 1) << 3;
+      iBlk8Y = (i >> 1) << 3;
+      iXOffset = iMBOffsetX + iBlk8X;
+      iYOffset = iMBOffsetY + iBlk8Y;
+
+      iIIdx = ((i >> 1) << 3) + ((i & 1) << 1);
+
+      pDstY = pPredYCbCr[0] + iBlk8X + iBlk8Y * iDstLineLuma;
+      pDstU = pPredYCbCr[1] + (iBlk8X >> 1) + (iBlk8Y >> 1) * iDstLineChroma;
+      pDstV = pPredYCbCr[2] + (iBlk8X >> 1) + (iBlk8Y >> 1) * iDstLineChroma;
+      pMCRefMem.pDstY = pDstY;
+      pMCRefMem.pDstU = pDstU;
+      pMCRefMem.pDstV = pDstV;
+
+      pTempMCRefMem = pMCRefMem;
+      pDstY2 = pTempPredYCbCr[0] + iBlk8X + iBlk8Y * iDstLineLuma;
+      pDstU2 = pTempPredYCbCr[1] + (iBlk8X >> 1) + (iBlk8Y >> 1) * iDstLineChroma;
+      pDstV2 = pTempPredYCbCr[2] + (iBlk8X >> 1) + (iBlk8Y >> 1) * iDstLineChroma;
+
+      pTempMCRefMem.pDstY = pDstY2;
+      pTempMCRefMem.pDstU = pDstU2;
+      pTempMCRefMem.pDstV = pDstV2;
+
+      if ((IS_TYPE_L0 (iSubMBType) && IS_TYPE_L1 (iSubMBType))) {
+        iRefIndex1 = pCurDqLayer->pRefIndex[LIST_0][iMBXY][iIIdx];
+        GetRefPic (&pMCRefMem, pCtx, pCurDqLayer->pRefIndex[LIST_0][iMBXY], iIIdx, LIST_0);
+
+        iRefIndex2 = pCurDqLayer->pRefIndex[LIST_1][iMBXY][iIIdx];
+        GetRefPic (&pTempMCRefMem, pCtx, pCurDqLayer->pRefIndex[LIST_1][iMBXY], iIIdx, LIST_1);
+      } else {
+        int32_t listIdx = IS_TYPE_L0 (iSubMBType) ? LIST_0 : LIST_1;
+        iRefIndex1 = pCurDqLayer->pRefIndex[listIdx][iMBXY][iIIdx];
+        GetRefPic (&pMCRefMem, pCtx, pCurDqLayer->pRefIndex[listIdx][iMBXY], iIIdx, listIdx);
+      }
+
+      if (IS_SUB_8x8 (iSubMBType)) {
+        if (IS_TYPE_L0 (iSubMBType) && IS_TYPE_L1 (iSubMBType)) {
+          iMVs[0] = pCurDqLayer->pMv[LIST_0][iMBXY][iIIdx][0];
+          iMVs[1] = pCurDqLayer->pMv[LIST_0][iMBXY][iIIdx][1];
+          BaseMC (&pMCRefMem, iXOffset, iYOffset, pMCFunc, 8, 8, iMVs);
+
+          iMVs[0] = pCurDqLayer->pMv[LIST_1][iMBXY][iIIdx][0];
+          iMVs[1] = pCurDqLayer->pMv[LIST_1][iMBXY][iIIdx][1];
+          BaseMC (&pTempMCRefMem, iXOffset, iYOffset, pMCFunc, 8, 8, iMVs);
+
+          if (pCurDqLayer->bUseWeightedBiPredIdc) {
+            BiWeightPrediction (pCurDqLayer, &pMCRefMem, &pTempMCRefMem, iRefIndex1, iRefIndex2, bWeightedBipredIdcIs1, 8, 8);
+          } else {
+            BiPrediction (pCurDqLayer, &pMCRefMem, &pTempMCRefMem,  8, 8);
+          }
+        } else {
+          int32_t listIdx = IS_TYPE_L0 (iSubMBType) ? LIST_0 : LIST_1;
+          iMVs[0] = pCurDqLayer->pMv[listIdx][iMBXY][iIIdx][0];
+          iMVs[1] = pCurDqLayer->pMv[listIdx][iMBXY][iIIdx][1];
+          BaseMC (&pMCRefMem, iXOffset, iYOffset, pMCFunc, 8, 8, iMVs);
+          if (bWeightedBipredIdcIs1) {
+            int32_t iRefIndex = pCurDqLayer->pRefIndex[listIdx][iMBXY][iIIdx];
+            WeightPrediction (pCurDqLayer, &pMCRefMem, listIdx, iRefIndex, 8, 8);
+          }
+        }
+      } else if (IS_SUB_8x4 (iSubMBType)) {
+        if (IS_TYPE_L0 (iSubMBType) && IS_TYPE_L1 (iSubMBType)) { //B_Bi_8x4
+          iMVs[0] = pCurDqLayer->pMv[LIST_0][iMBXY][iIIdx][0];
+          iMVs[1] = pCurDqLayer->pMv[LIST_0][iMBXY][iIIdx][1];
+          BaseMC (&pMCRefMem, iXOffset, iYOffset, pMCFunc, 8, 4, iMVs);
+          iMVs[0] = pCurDqLayer->pMv[LIST_1][iMBXY][iIIdx][0];
+          iMVs[1] = pCurDqLayer->pMv[LIST_1][iMBXY][iIIdx][1];
+          BaseMC (&pTempMCRefMem, iXOffset, iYOffset, pMCFunc, 8, 4, iMVs);
+
+          if (pCurDqLayer->bUseWeightedBiPredIdc) {
+            BiWeightPrediction (pCurDqLayer, &pMCRefMem, &pTempMCRefMem, iRefIndex1, iRefIndex2, bWeightedBipredIdcIs1, 8, 4);
+          } else {
+            BiPrediction (pCurDqLayer, &pMCRefMem, &pTempMCRefMem,  8, 4);
+          }
+
+          pMCRefMem.pDstY += (iDstLineLuma << 2);
+          pMCRefMem.pDstU += (iDstLineChroma << 1);
+          pMCRefMem.pDstV += (iDstLineChroma << 1);
+          iMVs[0] = pCurDqLayer->pMv[LIST_0][iMBXY][iIIdx + 4][0];
+          iMVs[1] = pCurDqLayer->pMv[LIST_0][iMBXY][iIIdx + 4][1];
+          BaseMC (&pMCRefMem, iXOffset, iYOffset + 4, pMCFunc, 8, 4, iMVs);
+
+          pTempMCRefMem.pDstY += (iDstLineLuma << 2);
+          pTempMCRefMem.pDstU += (iDstLineChroma << 1);
+          pTempMCRefMem.pDstV += (iDstLineChroma << 1);
+          iMVs[0] = pCurDqLayer->pMv[LIST_1][iMBXY][iIIdx + 4][0];
+          iMVs[1] = pCurDqLayer->pMv[LIST_1][iMBXY][iIIdx + 4][1];
+          BaseMC (&pTempMCRefMem, iXOffset, iYOffset + 4, pMCFunc, 8, 4, iMVs);
+
+          if (pCurDqLayer->bUseWeightedBiPredIdc) {
+            BiWeightPrediction (pCurDqLayer, &pMCRefMem, &pTempMCRefMem, iRefIndex1, iRefIndex2, bWeightedBipredIdcIs1, 8, 4);
+          } else {
+            BiPrediction (pCurDqLayer, &pMCRefMem, &pTempMCRefMem,  8, 4);
+          }
+        } else { //B_L0_8x4 B_L1_8x4
+          int32_t listIdx = IS_TYPE_L0 (iSubMBType) ? LIST_0 : LIST_1;
+          iMVs[0] = pCurDqLayer->pMv[listIdx][iMBXY][iIIdx][0];
+          iMVs[1] = pCurDqLayer->pMv[listIdx][iMBXY][iIIdx][1];
+          BaseMC (&pMCRefMem, iXOffset, iYOffset, pMCFunc, 8, 4, iMVs);
+          pMCRefMem.pDstY += (iDstLineLuma << 2);
+          pMCRefMem.pDstU += (iDstLineChroma << 1);
+          pMCRefMem.pDstV += (iDstLineChroma << 1);
+          iMVs[0] = pCurDqLayer->pMv[listIdx][iMBXY][iIIdx + 4][0];
+          iMVs[1] = pCurDqLayer->pMv[listIdx][iMBXY][iIIdx + 4][1];
+          BaseMC (&pMCRefMem, iXOffset, iYOffset + 4, pMCFunc, 8, 4, iMVs);
+          if (bWeightedBipredIdcIs1) {
+            int32_t iRefIndex = pCurDqLayer->pRefIndex[listIdx][iMBXY][iIIdx];
+            WeightPrediction (pCurDqLayer, &pMCRefMem, listIdx, iRefIndex, 8, 4);
+          }
+        }
+      } else if (IS_SUB_4x8 (iSubMBType)) {
+        if (IS_TYPE_L0 (iSubMBType) && IS_TYPE_L1 (iSubMBType)) { //B_Bi_4x8
+          iMVs[0] = pCurDqLayer->pMv[LIST_0][iMBXY][iIIdx][0];
+          iMVs[1] = pCurDqLayer->pMv[LIST_0][iMBXY][iIIdx][1];
+          BaseMC (&pMCRefMem, iXOffset, iYOffset, pMCFunc, 4, 8, iMVs);
+          iMVs[0] = pCurDqLayer->pMv[LIST_1][iMBXY][iIIdx][0];
+          iMVs[1] = pCurDqLayer->pMv[LIST_1][iMBXY][iIIdx][1];
+          BaseMC (&pTempMCRefMem, iXOffset, iYOffset, pMCFunc, 4, 8, iMVs);
+
+          if (pCurDqLayer->bUseWeightedBiPredIdc) {
+            BiWeightPrediction (pCurDqLayer, &pMCRefMem, &pTempMCRefMem, iRefIndex1, iRefIndex2, bWeightedBipredIdcIs1, 4, 8);
+          } else {
+            BiPrediction (pCurDqLayer, &pMCRefMem, &pTempMCRefMem,  4, 8);
+          }
+
+          pMCRefMem.pDstY += 4;
+          pMCRefMem.pDstU += 2;
+          pMCRefMem.pDstV += 2;
+          iMVs[0] = pCurDqLayer->pMv[LIST_0][iMBXY][iIIdx + 1][0];
+          iMVs[1] = pCurDqLayer->pMv[LIST_0][iMBXY][iIIdx + 1][1];
+          BaseMC (&pMCRefMem, iXOffset + 4, iYOffset, pMCFunc, 4, 8, iMVs);
+
+          pTempMCRefMem.pDstY += 4;
+          pTempMCRefMem.pDstU += 2;
+          pTempMCRefMem.pDstV += 2;
+          iMVs[0] = pCurDqLayer->pMv[LIST_1][iMBXY][iIIdx + 1][0];
+          iMVs[1] = pCurDqLayer->pMv[LIST_1][iMBXY][iIIdx + 1][1];
+          BaseMC (&pTempMCRefMem, iXOffset + 4, iYOffset, pMCFunc, 4, 8, iMVs);
+
+          if (pCurDqLayer->bUseWeightedBiPredIdc) {
+            BiWeightPrediction (pCurDqLayer, &pMCRefMem, &pTempMCRefMem, iRefIndex1, iRefIndex2, bWeightedBipredIdcIs1, 4, 8);
+          } else {
+            BiPrediction (pCurDqLayer, &pMCRefMem, &pTempMCRefMem, 4, 8);
+          }
+        } else { //B_L0_4x8 B_L1_4x8
+          int32_t listIdx = IS_TYPE_L0 (iSubMBType) ? LIST_0 : LIST_1;
+          iMVs[0] = pCurDqLayer->pMv[listIdx][iMBXY][iIIdx][0];
+          iMVs[1] = pCurDqLayer->pMv[listIdx][iMBXY][iIIdx][1];
+          BaseMC (&pMCRefMem, iXOffset, iYOffset, pMCFunc, 4, 8, iMVs);
+          pMCRefMem.pDstY += 4;
+          pMCRefMem.pDstU += 2;
+          pMCRefMem.pDstV += 2;
+          iMVs[0] = pCurDqLayer->pMv[listIdx][iMBXY][iIIdx + 1][0];
+          iMVs[1] = pCurDqLayer->pMv[listIdx][iMBXY][iIIdx + 1][1];
+          BaseMC (&pMCRefMem, iXOffset + 4, iYOffset, pMCFunc, 4, 8, iMVs);
+          if (bWeightedBipredIdcIs1) {
+            int32_t iRefIndex = pCurDqLayer->pRefIndex[listIdx][iMBXY][iIIdx];
+            WeightPrediction (pCurDqLayer, &pMCRefMem, listIdx, iRefIndex, 4, 8);
+          }
+        }
+      } else if (IS_SUB_4x4 (iSubMBType)) {
+        if (IS_TYPE_L0 (iSubMBType) && IS_TYPE_L1 (iSubMBType)) {
+          for (int32_t j = 0; j < 4; j++) {
+            int32_t iUVLineStride;
+            iJIdx = ((j >> 1) << 2) + (j & 1);
+
+            iBlk4X = (j & 1) << 2;
+            iBlk4Y = (j >> 1) << 2;
+
+            iUVLineStride = (iBlk4X >> 1) + (iBlk4Y >> 1) * iDstLineChroma;
+            pMCRefMem.pDstY = pDstY + iBlk4X + iBlk4Y * iDstLineLuma;
+            pMCRefMem.pDstU = pDstU + iUVLineStride;
+            pMCRefMem.pDstV = pDstV + iUVLineStride;
+
+            iMVs[0] = pCurDqLayer->pMv[LIST_0][iMBXY][iIIdx + iJIdx][0];
+            iMVs[1] = pCurDqLayer->pMv[LIST_0][iMBXY][iIIdx + iJIdx][1];
+            BaseMC (&pMCRefMem, iXOffset + iBlk4X, iYOffset + iBlk4Y, pMCFunc, 4, 4, iMVs);
+
+            pTempMCRefMem.pDstY = pDstY2 + iBlk8X + iBlk8Y * iDstLineLuma;
+            pTempMCRefMem.pDstU = pDstU2 + iUVLineStride;
+            pTempMCRefMem.pDstV = pDstV2 + iUVLineStride;;
+
+            iMVs[0] = pCurDqLayer->pMv[LIST_1][iMBXY][iIIdx + iJIdx][0];
+            iMVs[1] = pCurDqLayer->pMv[LIST_1][iMBXY][iIIdx + iJIdx][1];
+            BaseMC (&pTempMCRefMem, iXOffset + iBlk4X, iYOffset + iBlk4Y, pMCFunc, 4, 4, iMVs);
+
+            if (pCurDqLayer->bUseWeightedBiPredIdc) {
+              BiWeightPrediction (pCurDqLayer, &pMCRefMem, &pTempMCRefMem, iRefIndex1, iRefIndex2, bWeightedBipredIdcIs1, 4, 4);
+            } else {
+              BiPrediction (pCurDqLayer, &pMCRefMem, &pTempMCRefMem,  4, 4);
+            }
+          }
+        } else {
+          int32_t listIdx = IS_TYPE_L0 (iSubMBType) ? LIST_0 : LIST_1;
+          int32_t iRefIndex = pCurDqLayer->pRefIndex[listIdx][iMBXY][iIIdx];
+          for (int32_t j = 0; j < 4; j++) {
+            int32_t iUVLineStride;
+            iJIdx = ((j >> 1) << 2) + (j & 1);
+
+            iBlk4X = (j & 1) << 2;
+            iBlk4Y = (j >> 1) << 2;
+
+            iUVLineStride = (iBlk4X >> 1) + (iBlk4Y >> 1) * iDstLineChroma;
+            pMCRefMem.pDstY = pDstY + iBlk4X + iBlk4Y * iDstLineLuma;
+            pMCRefMem.pDstU = pDstU + iUVLineStride;
+            pMCRefMem.pDstV = pDstV + iUVLineStride;
+
+            iMVs[0] = pCurDqLayer->pMv[listIdx][iMBXY][iIIdx + iJIdx][0];
+            iMVs[1] = pCurDqLayer->pMv[listIdx][iMBXY][iIIdx + iJIdx][1];
+            BaseMC (&pMCRefMem, iXOffset + iBlk4X, iYOffset + iBlk4Y, pMCFunc, 4, 4, iMVs);
+            if (bWeightedBipredIdcIs1) {
+              WeightPrediction (pCurDqLayer, &pMCRefMem, listIdx, iRefIndex, 4, 4);
+            }
+          }
+        }
+      }
+    }
+  }
+}
+
+int32_t RecChroma (int32_t iMBXY, PWelsDecoderContext pCtx, int16_t* pScoeffLevel, PDqLayer pDqLayer) {
+  int32_t iChromaStride = pCtx->pCurDqLayer->pDec->iLinesize[1];
+  PIdctFourResAddPredFunc pIdctFourResAddPredFunc = pCtx->pIdctFourResAddPredFunc;
+
+  uint8_t i = 0;
+  uint8_t uiCbpC = pDqLayer->pCbp[iMBXY] >> 4;
+
+  if (1 == uiCbpC || 2 == uiCbpC) {
+    for (i = 0; i < 2; i++) {
+      int16_t* pRS = pScoeffLevel + 256 + (i << 6);
+      uint8_t* pPred = pDqLayer->pPred[i + 1];
+      const int8_t* pNzc = pDqLayer->pNzc[iMBXY] + 16 + 2 * i;
+
+      /*1 chroma is divided 4 4x4_block to idct*/
+      pIdctFourResAddPredFunc (pPred, iChromaStride, pRS, pNzc);
+    }
+  }
+
+  return ERR_NONE;
+}
+
+} // namespace WelsDec
--- a/codec/decoder/plus/inc/welsDecoderExt.h
+++ b/codec/decoder/plus/inc/welsDecoderExt.h
@@ -1,127 +1,147 @@
-/*!
- * \copy
- *     Copyright (c)  2009-2013, Cisco Systems
- *     All rights reserved.
- *
- *     Redistribution and use in source and binary forms, with or without
- *     modification, are permitted provided that the following conditions
- *     are met:
- *
- *        * Redistributions of source code must retain the above copyright
- *          notice, this list of conditions and the following disclaimer.
- *
- *        * Redistributions in binary form must reproduce the above copyright
- *          notice, this list of conditions and the following disclaimer in
- *          the documentation and/or other materials provided with the
- *          distribution.
- *
- *     THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- *     "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- *     LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
- *     FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
- *     COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
- *     INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
- *     BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
- *     LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
- *     CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- *     LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
- *     ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- *     POSSIBILITY OF SUCH DAMAGE.
- *
- *
- *  WelsDecoderExt.h
- *
- *  Abstract
- *      Cisco OpenH264 decoder extension utilization interface
- *
- *  History
- *      3/12/2009 Created
- *
- *
- *************************************************************************/
-#if !defined(WELS_PLUS_WELSDECODEREXT_H)
-#define WELS_PLUS_WELSDECODEREXT_H
-
-#include "codec_api.h"
-#include "codec_app_def.h"
-#include "decoder_context.h"
-#include "welsCodecTrace.h"
-#include "cpu.h"
-
-class ISVCDecoder;
-
-namespace WelsDec {
-
-//#define OUTPUT_BIT_STREAM  ////for test to output bitstream
-
-class CWelsDecoder : public ISVCDecoder {
- public:
-CWelsDecoder (void);
-virtual ~CWelsDecoder();
-
-virtual long EXTAPI Initialize (const SDecodingParam* pParam);
-virtual long EXTAPI Uninitialize();
-
-/***************************************************************************
-*   Description:
-*       Decompress one frame, and output I420 or RGB24(in the future) decoded stream and its length.
-*   Input parameters:
-*       Parameter       TYPE                   Description
-*       pSrc            unsigned char*         the h264 stream to decode
-*       srcLength       int                    the length of h264 steam
-*       pDst            unsigned char*         buffer pointer of decoded data
-*       pDstInfo        SBufferInfo&           information provided to API including width, height, SW/HW option, etc
-*
-*   return: if decode frame success return 0, otherwise corresponding error returned.
-***************************************************************************/
-virtual DECODING_STATE EXTAPI DecodeFrame (const unsigned char* kpSrc,
-    const int kiSrcLen,
-    unsigned char** ppDst,
-    int* pStride,
-    int& iWidth,
-    int& iHeight);
-
-virtual DECODING_STATE EXTAPI DecodeFrameNoDelay (const unsigned char* kpSrc,
-    const int kiSrcLen,
-    unsigned char** ppDst,
-    SBufferInfo* pDstInfo);
-
-virtual DECODING_STATE EXTAPI DecodeFrame2 (const unsigned char* kpSrc,
-    const int kiSrcLen,
-    unsigned char** ppDst,
-    SBufferInfo* pDstInfo);
-virtual DECODING_STATE EXTAPI DecodeParser (const unsigned char* kpSrc,
-    const int kiSrcLen,
-    SParserBsInfo* pDstInfo);
-virtual DECODING_STATE EXTAPI DecodeFrameEx (const unsigned char* kpSrc,
-    const int kiSrcLen,
-    unsigned char* pDst,
-    int iDstStride,
-    int& iDstLen,
-    int& iWidth,
-    int& iHeight,
-    int& color_format);
-
-virtual long EXTAPI SetOption (DECODER_OPTION eOptID, void* pOption);
-virtual long EXTAPI GetOption (DECODER_OPTION eOptID, void* pOption);
-
- private:
-PWelsDecoderContext     m_pDecContext;
-welsCodecTrace*         m_pWelsTrace;
-
-int32_t InitDecoder (const SDecodingParam* pParam);
-void UninitDecoder (void);
-int32_t ResetDecoder();
-
-void OutputStatisticsLog (SDecoderStatistics& sDecoderStatistics);
-
-#ifdef OUTPUT_BIT_STREAM
-WelsFileHandle* m_pFBS;
-WelsFileHandle* m_pFBSSize;
-#endif//OUTPUT_BIT_STREAM
-
-};
-
-} // namespace WelsDec
-
-#endif // !defined(WELS_PLUS_WELSDECODEREXT_H)
+/*!
+ * \copy
+ *     Copyright (c)  2009-2013, Cisco Systems
+ *     All rights reserved.
+ *
+ *     Redistribution and use in source and binary forms, with or without
+ *     modification, are permitted provided that the following conditions
+ *     are met:
+ *
+ *        * Redistributions of source code must retain the above copyright
+ *          notice, this list of conditions and the following disclaimer.
+ *
+ *        * Redistributions in binary form must reproduce the above copyright
+ *          notice, this list of conditions and the following disclaimer in
+ *          the documentation and/or other materials provided with the
+ *          distribution.
+ *
+ *     THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *     "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *     LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+ *     FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+ *     COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ *     INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ *     BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ *     LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+ *     CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ *     LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
+ *     ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ *     POSSIBILITY OF SUCH DAMAGE.
+ *
+ *
+ *  WelsDecoderExt.h
+ *
+ *  Abstract
+ *      Cisco OpenH264 decoder extension utilization interface
+ *
+ *  History
+ *      3/12/2009 Created
+ *
+ *
+ *************************************************************************/
+#if !defined(WELS_PLUS_WELSDECODEREXT_H)
+#define WELS_PLUS_WELSDECODEREXT_H
+
+#include "codec_api.h"
+#include "codec_app_def.h"
+#include "decoder_context.h"
+#include "welsCodecTrace.h"
+#include "cpu.h"
+
+class ISVCDecoder;
+
+namespace WelsDec {
+
+//#define OUTPUT_BIT_STREAM  ////for test to output bitstream
+
+class CWelsDecoder : public ISVCDecoder {
+ public:
+  CWelsDecoder (void);
+  virtual ~CWelsDecoder();
+
+  virtual long EXTAPI Initialize (const SDecodingParam* pParam);
+  virtual long EXTAPI Uninitialize();
+
+  /***************************************************************************
+  *   Description:
+  *       Decompress one frame, and output I420 or RGB24(in the future) decoded stream and its length.
+  *   Input parameters:
+  *       Parameter       TYPE                   Description
+  *       pSrc            unsigned char*         the h264 stream to decode
+  *       srcLength       int                    the length of h264 steam
+  *       pDst            unsigned char*         buffer pointer of decoded data
+  *       pDstInfo        SBufferInfo&           information provided to API including width, height, SW/HW option, etc
+  *
+  *   return: if decode frame success return 0, otherwise corresponding error returned.
+  ***************************************************************************/
+  virtual DECODING_STATE EXTAPI DecodeFrame (const unsigned char* kpSrc,
+      const int kiSrcLen,
+      unsigned char** ppDst,
+      int* pStride,
+      int& iWidth,
+      int& iHeight);
+
+  virtual DECODING_STATE EXTAPI DecodeFrameNoDelay (const unsigned char* kpSrc,
+      const int kiSrcLen,
+      unsigned char** ppDst,
+      SBufferInfo* pDstInfo);
+
+  virtual DECODING_STATE EXTAPI DecodeFrame2 (const unsigned char* kpSrc,
+      const int kiSrcLen,
+      unsigned char** ppDst,
+      SBufferInfo* pDstInfo);
+
+  virtual DECODING_STATE EXTAPI FlushFrame (unsigned char** ppDst,
+      SBufferInfo* pDstInfo);
+
+  virtual DECODING_STATE EXTAPI DecodeParser (const unsigned char* kpSrc,
+      const int kiSrcLen,
+      SParserBsInfo* pDstInfo);
+  virtual DECODING_STATE EXTAPI DecodeFrameEx (const unsigned char* kpSrc,
+      const int kiSrcLen,
+      unsigned char* pDst,
+      int iDstStride,
+      int& iDstLen,
+      int& iWidth,
+      int& iHeight,
+      int& color_format);
+
+  virtual long EXTAPI SetOption (DECODER_OPTION eOptID, void* pOption);
+  virtual long EXTAPI GetOption (DECODER_OPTION eOptID, void* pOption);
+
+  typedef struct tagPictInfo {
+    SBufferInfo             sBufferInfo;
+    int32_t                 iPOC;
+    int32_t                 iFrameNum;
+    bool                    bLastGOP;
+    unsigned char*          pData[3];
+  } SPictInfo, *PPictInfo;
+
+ private:
+  PWelsDecoderContext     m_pDecContext;
+  welsCodecTrace*         m_pWelsTrace;
+  SPictInfo               m_sPictInfoList[16];
+  int32_t                 m_iPictInfoIndex;
+  int32_t                 m_iMinPOC;
+  int32_t                 m_iNumOfPicts;
+  int32_t                 m_iLastGOPRemainPicts;
+  int32_t                 m_LastWrittenPOC;
+  int32_t                 m_iLargestBufferedPicIndex;
+
+  int32_t InitDecoder (const SDecodingParam* pParam);
+  void UninitDecoder (void);
+  int32_t ResetDecoder();
+
+  void OutputStatisticsLog (SDecoderStatistics& sDecoderStatistics);
+  DECODING_STATE ReorderPicturesInDisplay (unsigned char** ppDst, SBufferInfo* pDstInfo);
+
+#ifdef OUTPUT_BIT_STREAM
+  WelsFileHandle* m_pFBS;
+  WelsFileHandle* m_pFBSSize;
+#endif//OUTPUT_BIT_STREAM
+
+};
+
+} // namespace WelsDec
+
+#endif // !defined(WELS_PLUS_WELSDECODEREXT_H)
--- a/codec/decoder/plus/src/welsDecoderExt.cpp
+++ b/codec/decoder/plus/src/welsDecoderExt.cpp
@@ -1,860 +1,1016 @@
-/*!
- * \copy
- *     Copyright (c)  2009-2013, Cisco Systems
- *     All rights reserved.
- *
- *     Redistribution and use in source and binary forms, with or without
- *     modification, are permitted provided that the following conditions
- *     are met:
- *
- *        * Redistributions of source code must retain the above copyright
- *          notice, this list of conditions and the following disclaimer.
- *
- *        * Redistributions in binary form must reproduce the above copyright
- *          notice, this list of conditions and the following disclaimer in
- *          the documentation and/or other materials provided with the
- *          distribution.
- *
- *     THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- *     "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- *     LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
- *     FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
- *     COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
- *     INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
- *     BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
- *     LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
- *     CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- *     LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
- *     ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- *     POSSIBILITY OF SUCH DAMAGE.
- *
- *
- *  welsDecoderExt.cpp
- *
- *  Abstract
- *      Cisco OpenH264 decoder extension utilization
- *
- *  History
- *      3/12/2009 Created
- *
- *
- ************************************************************************/
-//#include <assert.h>
-#include "welsDecoderExt.h"
-#include "welsCodecTrace.h"
-#include "codec_def.h"
-#include "typedefs.h"
-#include "memory_align.h"
-#include "utils.h"
-#include "version.h"
-
-//#include "macros.h"
-#include "decoder.h"
-#include "decoder_core.h"
-#include "error_concealment.h"
-
-#include "measure_time.h"
-extern "C" {
-#include "decoder_core.h"
-#include "manage_dec_ref.h"
-}
-#include "error_code.h"
-#include "crt_util_safe_x.h" // Safe CRT routines like util for cross platforms
-#include <time.h>
-#if defined(_WIN32) /*&& defined(_DEBUG)*/
-
-#include <windows.h>
-#include <stdio.h>
-#include <stdarg.h>
-#include <sys/types.h>
-#else
-#include <sys/time.h>
-#endif
-
-namespace WelsDec {
-
-//////////////////////////////////////////////////////////////////////
-// Construction/Destruction
-//////////////////////////////////////////////////////////////////////
-
-/***************************************************************************
-*   Description:
-*       class CWelsDecoder constructor function, do initialization  and
-*       alloc memory required
-*
-*   Input parameters: none
-*
-*   return: none
-***************************************************************************/
-CWelsDecoder::CWelsDecoder (void)
-  : m_pDecContext (NULL),
-    m_pWelsTrace (NULL) {
-#ifdef OUTPUT_BIT_STREAM
-  char chFileName[1024] = { 0 };  //for .264
-  int iBufUsed = 0;
-  int iBufLeft = 1023;
-  int iCurUsed;
-
-  char chFileNameSize[1024] = { 0 }; //for .len
-  int iBufUsedSize = 0;
-  int iBufLeftSize = 1023;
-  int iCurUsedSize;
-#endif//OUTPUT_BIT_STREAM
-
-
-  m_pWelsTrace = new welsCodecTrace();
-  if (m_pWelsTrace != NULL) {
-    m_pWelsTrace->SetCodecInstance (this);
-    m_pWelsTrace->SetTraceLevel (WELS_LOG_ERROR);
-
-    WelsLog (&m_pWelsTrace->m_sLogCtx, WELS_LOG_INFO, "CWelsDecoder::CWelsDecoder() entry");
-  }
-
-#ifdef OUTPUT_BIT_STREAM
-  SWelsTime sCurTime;
-
-  WelsGetTimeOfDay (&sCurTime);
-
-  iCurUsed     = WelsSnprintf (chFileName,  iBufLeft,  "bs_0x%p_", (void*)this);
-  iCurUsedSize = WelsSnprintf (chFileNameSize, iBufLeftSize, "size_0x%p_", (void*)this);
-
-  iBufUsed += iCurUsed;
-  iBufLeft -= iCurUsed;
-  if (iBufLeft > 0) {
-    iCurUsed = WelsStrftime (&chFileName[iBufUsed], iBufLeft, "%y%m%d%H%M%S", &sCurTime);
-    iBufUsed += iCurUsed;
-    iBufLeft -= iCurUsed;
-  }
-
-  iBufUsedSize += iCurUsedSize;
-  iBufLeftSize -= iCurUsedSize;
-  if (iBufLeftSize > 0) {
-    iCurUsedSize = WelsStrftime (&chFileNameSize[iBufUsedSize], iBufLeftSize, "%y%m%d%H%M%S", &sCurTime);
-    iBufUsedSize += iCurUsedSize;
-    iBufLeftSize -= iCurUsedSize;
-  }
-
-  if (iBufLeft > 0) {
-    iCurUsed = WelsSnprintf (&chFileName[iBufUsed], iBufLeft, ".%03.3u.264", WelsGetMillisecond (&sCurTime));
-    iBufUsed += iCurUsed;
-    iBufLeft -= iCurUsed;
-  }
-
-  if (iBufLeftSize > 0) {
-    iCurUsedSize = WelsSnprintf (&chFileNameSize[iBufUsedSize], iBufLeftSize, ".%03.3u.len",
-                                 WelsGetMillisecond (&sCurTime));
-    iBufUsedSize += iCurUsedSize;
-    iBufLeftSize -= iCurUsedSize;
-  }
-
-
-  m_pFBS = WelsFopen (chFileName, "wb");
-  m_pFBSSize = WelsFopen (chFileNameSize, "wb");
-#endif//OUTPUT_BIT_STREAM
-
-}
-
-/***************************************************************************
-*   Description:
-*       class CWelsDecoder destructor function, destroy allocced memory
-*
-*   Input parameters: none
-*
-*   return: none
-***************************************************************************/
-CWelsDecoder::~CWelsDecoder() {
-  if (m_pWelsTrace != NULL) {
-    WelsLog (&m_pWelsTrace->m_sLogCtx, WELS_LOG_INFO, "CWelsDecoder::~CWelsDecoder()");
-  }
-
-  UninitDecoder();
-
-#ifdef OUTPUT_BIT_STREAM
-  if (m_pFBS) {
-    WelsFclose (m_pFBS);
-    m_pFBS = NULL;
-  }
-  if (m_pFBSSize) {
-    WelsFclose (m_pFBSSize);
-    m_pFBSSize = NULL;
-  }
-#endif//OUTPUT_BIT_STREAM
-
-  if (m_pWelsTrace != NULL) {
-    delete m_pWelsTrace;
-    m_pWelsTrace = NULL;
-  }
-}
-
-long CWelsDecoder::Initialize (const SDecodingParam* pParam) {
-  int iRet = ERR_NONE;
-  if (m_pWelsTrace == NULL) {
-    return cmMallocMemeError;
-  }
-
-  if (pParam == NULL) {
-    WelsLog (&m_pWelsTrace->m_sLogCtx, WELS_LOG_ERROR, "CWelsDecoder::Initialize(), invalid input argument.");
-    return cmInitParaError;
-  }
-
-  // H.264 decoder initialization,including memory allocation,then open it ready to decode
-  iRet = InitDecoder (pParam);
-  if (iRet)
-    return iRet;
-
-  return cmResultSuccess;
-}
-
-long CWelsDecoder::Uninitialize() {
-  UninitDecoder();
-
-  return ERR_NONE;
-}
-
-void CWelsDecoder::UninitDecoder (void) {
-  if (NULL == m_pDecContext)
-    return;
-
-  WelsLog (&m_pWelsTrace->m_sLogCtx, WELS_LOG_INFO, "CWelsDecoder::UninitDecoder(), openh264 codec version = %s.",
-           VERSION_NUMBER);
-
-  WelsEndDecoder (m_pDecContext);
-
-  if (m_pDecContext->pMemAlign != NULL) {
-    WelsLog (&m_pWelsTrace->m_sLogCtx, WELS_LOG_INFO,
-             "CWelsDecoder::UninitDecoder(), verify memory usage (%d bytes) after free..",
-             m_pDecContext->pMemAlign->WelsGetMemoryUsage());
-    delete m_pDecContext->pMemAlign;
-    m_pDecContext->pMemAlign = NULL;
-  }
-
-  if (NULL != m_pDecContext) {
-    WelsFree (m_pDecContext, "m_pDecContext");
-
-    m_pDecContext = NULL;
-  }
-
-}
-
-// the return value of this function is not suitable, it need report failure info to upper layer.
-int32_t CWelsDecoder::InitDecoder (const SDecodingParam* pParam) {
-
-  WelsLog (&m_pWelsTrace->m_sLogCtx, WELS_LOG_INFO,
-           "CWelsDecoder::init_decoder(), openh264 codec version = %s, ParseOnly = %d",
-           VERSION_NUMBER, (int32_t)pParam->bParseOnly);
-
-  //reset decoder context
-  if (m_pDecContext) //free
-    UninitDecoder();
-  m_pDecContext = (PWelsDecoderContext)WelsMallocz (sizeof (SWelsDecoderContext), "m_pDecContext");
-  if (NULL == m_pDecContext)
-    return cmMallocMemeError;
-  int32_t iCacheLineSize = 16;   // on chip cache line size in byte
-  m_pDecContext->pMemAlign = new CMemoryAlign (iCacheLineSize);
-  WELS_VERIFY_RETURN_PROC_IF (cmMallocMemeError, (NULL == m_pDecContext->pMemAlign), UninitDecoder())
-
-  //fill in default value into context
-  WelsDecoderDefaults (m_pDecContext, &m_pWelsTrace->m_sLogCtx);
-
-  //check param and update decoder context
-  m_pDecContext->pParam = (SDecodingParam*) m_pDecContext->pMemAlign->WelsMallocz (sizeof (SDecodingParam),
-                          "SDecodingParam");
-  WELS_VERIFY_RETURN_PROC_IF (cmMallocMemeError, (NULL == m_pDecContext->pParam), UninitDecoder());
-  int32_t iRet = DecoderConfigParam (m_pDecContext, pParam);
-  WELS_VERIFY_RETURN_IFNEQ (iRet, cmResultSuccess);
-
-  //init decoder
-  WELS_VERIFY_RETURN_PROC_IF (cmMallocMemeError, WelsInitDecoder (m_pDecContext, &m_pWelsTrace->m_sLogCtx),
-                              UninitDecoder())
-
-  return cmResultSuccess;
-}
-
-int32_t CWelsDecoder::ResetDecoder() {
-  // TBC: need to be modified when context and trace point are null
-  if (m_pDecContext != NULL && m_pWelsTrace != NULL) {
-    WelsLog (&m_pWelsTrace->m_sLogCtx, WELS_LOG_INFO, "ResetDecoder(), context error code is %d",
-             m_pDecContext->iErrorCode);
-    SDecodingParam sPrevParam;
-    memcpy (&sPrevParam, m_pDecContext->pParam, sizeof (SDecodingParam));
-
-    WELS_VERIFY_RETURN_PROC_IF (cmInitParaError, InitDecoder (&sPrevParam), UninitDecoder());
-  } else if (m_pWelsTrace != NULL) {
-    WelsLog (&m_pWelsTrace->m_sLogCtx, WELS_LOG_ERROR, "ResetDecoder() failed as decoder context null");
-  }
-  return ERR_INFO_UNINIT;
-}
-
-/*
- * Set Option
- */
-long CWelsDecoder::SetOption (DECODER_OPTION eOptID, void* pOption) {
-  int iVal = 0;
-
-  if (m_pDecContext == NULL && eOptID != DECODER_OPTION_TRACE_LEVEL &&
-      eOptID != DECODER_OPTION_TRACE_CALLBACK && eOptID != DECODER_OPTION_TRACE_CALLBACK_CONTEXT)
-    return dsInitialOptExpected;
-  if (eOptID == DECODER_OPTION_END_OF_STREAM) { // Indicate bit-stream of the final frame to be decoded
-    if (pOption == NULL)
-      return cmInitParaError;
-
-    iVal = * ((int*)pOption); // boolean value for whether enabled End Of Stream flag
-
-    m_pDecContext->bEndOfStreamFlag = iVal ? true : false;
-
-    return cmResultSuccess;
-  } else if (eOptID == DECODER_OPTION_ERROR_CON_IDC) { // Indicate error concealment status
-    if (pOption == NULL)
-      return cmInitParaError;
-
-    iVal = * ((int*)pOption); // int value for error concealment idc
-    iVal = WELS_CLIP3 (iVal, (int32_t) ERROR_CON_DISABLE, (int32_t) ERROR_CON_SLICE_MV_COPY_CROSS_IDR_FREEZE_RES_CHANGE);
-    if ((m_pDecContext->pParam->bParseOnly) && (iVal != (int32_t) ERROR_CON_DISABLE)) {
-      WelsLog (&m_pWelsTrace->m_sLogCtx, WELS_LOG_INFO,
-               "CWelsDecoder::SetOption for ERROR_CON_IDC = %d not allowd for parse only!.", iVal);
-      return cmInitParaError;
-    }
-
-    m_pDecContext->pParam->eEcActiveIdc = (ERROR_CON_IDC) iVal;
-    InitErrorCon (m_pDecContext);
-    WelsLog (&m_pWelsTrace->m_sLogCtx, WELS_LOG_INFO,
-             "CWelsDecoder::SetOption for ERROR_CON_IDC = %d.", iVal);
-
-    return cmResultSuccess;
-  } else if (eOptID == DECODER_OPTION_TRACE_LEVEL) {
-    if (m_pWelsTrace) {
-      uint32_t level = * ((uint32_t*)pOption);
-      m_pWelsTrace->SetTraceLevel (level);
-    }
-    return cmResultSuccess;
-  } else if (eOptID == DECODER_OPTION_TRACE_CALLBACK) {
-    if (m_pWelsTrace) {
-      WelsTraceCallback callback = * ((WelsTraceCallback*)pOption);
-      m_pWelsTrace->SetTraceCallback (callback);
-      WelsLog (&m_pWelsTrace->m_sLogCtx, WELS_LOG_INFO,
-               "CWelsDecoder::SetOption():DECODER_OPTION_TRACE_CALLBACK callback = %p.",
-               callback);
-    }
-    return cmResultSuccess;
-  } else if (eOptID == DECODER_OPTION_TRACE_CALLBACK_CONTEXT) {
-    if (m_pWelsTrace) {
-      void* ctx = * ((void**)pOption);
-      m_pWelsTrace->SetTraceCallbackContext (ctx);
-    }
-    return cmResultSuccess;
-  } else if (eOptID == DECODER_OPTION_GET_STATISTICS) {
-    WelsLog (&m_pWelsTrace->m_sLogCtx, WELS_LOG_WARNING,
-             "CWelsDecoder::SetOption():DECODER_OPTION_GET_STATISTICS: this option is get-only!");
-    return cmInitParaError;
-  } else if (eOptID == DECODER_OPTION_STATISTICS_LOG_INTERVAL) {
-    if (pOption) {
-      m_pDecContext->sDecoderStatistics.iStatisticsLogInterval = (* ((unsigned int*)pOption));
-      return cmResultSuccess;
-    }
-  } else if (eOptID == DECODER_OPTION_GET_SAR_INFO) {
-    WelsLog (&m_pWelsTrace->m_sLogCtx, WELS_LOG_WARNING,
-             "CWelsDecoder::SetOption():DECODER_OPTION_GET_SAR_INFO: this option is get-only!");
-    return cmInitParaError;
-  }
-  return cmInitParaError;
-}
-
-/*
- *  Get Option
- */
-long CWelsDecoder::GetOption (DECODER_OPTION eOptID, void* pOption) {
-  int iVal = 0;
-
-  if (m_pDecContext == NULL)
-    return cmInitExpected;
-
-  if (pOption == NULL)
-    return cmInitParaError;
-
-  if (DECODER_OPTION_END_OF_STREAM == eOptID) {
-    iVal = m_pDecContext->bEndOfStreamFlag;
-    * ((int*)pOption) = iVal;
-    return cmResultSuccess;
-  }
-#ifdef LONG_TERM_REF
-  else if (DECODER_OPTION_IDR_PIC_ID == eOptID) {
-    iVal = m_pDecContext->uiCurIdrPicId;
-    * ((int*)pOption) = iVal;
-    return cmResultSuccess;
-  } else if (DECODER_OPTION_FRAME_NUM == eOptID) {
-    iVal = m_pDecContext->iFrameNum;
-    * ((int*)pOption) = iVal;
-    return cmResultSuccess;
-  } else if (DECODER_OPTION_LTR_MARKING_FLAG == eOptID) {
-    iVal = m_pDecContext->bCurAuContainLtrMarkSeFlag;
-    * ((int*)pOption) = iVal;
-    return cmResultSuccess;
-  } else if (DECODER_OPTION_LTR_MARKED_FRAME_NUM == eOptID) {
-    iVal = m_pDecContext->iFrameNumOfAuMarkedLtr;
-    * ((int*)pOption) = iVal;
-    return cmResultSuccess;
-  }
-#endif
-  else if (DECODER_OPTION_VCL_NAL == eOptID) { //feedback whether or not have VCL NAL in current AU
-    iVal = m_pDecContext->iFeedbackVclNalInAu;
-    * ((int*)pOption) = iVal;
-    return cmResultSuccess;
-  } else if (DECODER_OPTION_TEMPORAL_ID == eOptID) { //if have VCL NAL in current AU, then feedback the temporal ID
-    iVal = m_pDecContext->iFeedbackTidInAu;
-    * ((int*)pOption) = iVal;
-    return cmResultSuccess;
-  } else if (DECODER_OPTION_IS_REF_PIC == eOptID) {
-    iVal = m_pDecContext->iFeedbackNalRefIdc;
-    if (iVal > 0)
-      iVal = 1;
-    * ((int*)pOption) = iVal;
-    return cmResultSuccess;
-  } else if (DECODER_OPTION_ERROR_CON_IDC == eOptID) {
-    iVal = (int) m_pDecContext->pParam->eEcActiveIdc;
-    * ((int*)pOption) = iVal;
-    return cmResultSuccess;
-  } else if (DECODER_OPTION_GET_STATISTICS == eOptID) { // get decoder statistics info for real time debugging
-    SDecoderStatistics* pDecoderStatistics = (static_cast<SDecoderStatistics*> (pOption));
-
-    memcpy (pDecoderStatistics, &m_pDecContext->sDecoderStatistics, sizeof (SDecoderStatistics));
-
-    if (m_pDecContext->sDecoderStatistics.uiDecodedFrameCount != 0) { //not original status
-      pDecoderStatistics->fAverageFrameSpeedInMs = (float) (m_pDecContext->dDecTime) /
-          (m_pDecContext->sDecoderStatistics.uiDecodedFrameCount);
-      pDecoderStatistics->fActualAverageFrameSpeedInMs = (float) (m_pDecContext->dDecTime) /
-          (m_pDecContext->sDecoderStatistics.uiDecodedFrameCount + m_pDecContext->sDecoderStatistics.uiFreezingIDRNum +
-           m_pDecContext->sDecoderStatistics.uiFreezingNonIDRNum);
-    }
-    return cmResultSuccess;
-  } else if (eOptID == DECODER_OPTION_STATISTICS_LOG_INTERVAL) {
-    if (pOption) {
-      iVal = m_pDecContext->sDecoderStatistics.iStatisticsLogInterval;
-      * ((unsigned int*)pOption) = iVal;
-      return cmResultSuccess;
-    }
-  } else if (DECODER_OPTION_GET_SAR_INFO == eOptID) { //get decoder SAR info in VUI
-    PVuiSarInfo pVuiSarInfo = (static_cast<PVuiSarInfo> (pOption));
-    memset (pVuiSarInfo, 0, sizeof (SVuiSarInfo));
-    if (!m_pDecContext->pSps) {
-      return cmInitExpected;
-    } else {
-      pVuiSarInfo->uiSarWidth = m_pDecContext->pSps->sVui.uiSarWidth;
-      pVuiSarInfo->uiSarHeight = m_pDecContext->pSps->sVui.uiSarHeight;
-      pVuiSarInfo->bOverscanAppropriateFlag = m_pDecContext->pSps->sVui.bOverscanAppropriateFlag;
-      return cmResultSuccess;
-    }
-  } else if (DECODER_OPTION_PROFILE == eOptID) {
-    if (!m_pDecContext->pSps) {
-      return cmInitExpected;
-    }
-    iVal = (int) m_pDecContext->pSps->uiProfileIdc;
-    * ((int*)pOption) = iVal;
-    return cmResultSuccess;
-  } else if (DECODER_OPTION_LEVEL == eOptID) {
-    if (!m_pDecContext->pSps) {
-      return cmInitExpected;
-    }
-    iVal = (int) m_pDecContext->pSps->uiLevelIdc;
-    * ((int*)pOption) = iVal;
-    return cmResultSuccess;
-  }
-
-  return cmInitParaError;
-}
-
-DECODING_STATE CWelsDecoder::DecodeFrameNoDelay (const unsigned char* kpSrc,
-    const int kiSrcLen,
-    unsigned char** ppDst,
-    SBufferInfo* pDstInfo) {
-  int iRet;
-  //SBufferInfo sTmpBufferInfo;
-  //unsigned char* ppTmpDst[3] = {NULL, NULL, NULL};
-
-  iRet = (int) DecodeFrame2 (kpSrc, kiSrcLen, ppDst, pDstInfo);
-  //memcpy (&sTmpBufferInfo, pDstInfo, sizeof (SBufferInfo));
-  //ppTmpDst[0] = ppDst[0];
-  //ppTmpDst[1] = ppDst[1];
-  //ppTmpDst[2] = ppDst[2];
-  iRet |= DecodeFrame2 (NULL, 0, ppDst, pDstInfo);
-  //if ((pDstInfo->iBufferStatus == 0) && (sTmpBufferInfo.iBufferStatus == 1)) {
-  //memcpy (pDstInfo, &sTmpBufferInfo, sizeof (SBufferInfo));
-  //ppDst[0] = ppTmpDst[0];
-  //ppDst[1] = ppTmpDst[1];
-  //ppDst[2] = ppTmpDst[2];
-  //}
-
-  return (DECODING_STATE) iRet;
-}
-
-DECODING_STATE CWelsDecoder::DecodeFrame2 (const unsigned char* kpSrc,
-    const int kiSrcLen,
-    unsigned char** ppDst,
-    SBufferInfo* pDstInfo) {
-  if (m_pDecContext == NULL || m_pDecContext->pParam == NULL) {
-    if (m_pWelsTrace != NULL) {
-      WelsLog (&m_pWelsTrace->m_sLogCtx, WELS_LOG_ERROR, "Call DecodeFrame2 without Initialize.\n");
-    }
-    return dsInitialOptExpected;
-  }
-
-  if (m_pDecContext->pParam->bParseOnly) {
-    WelsLog (&m_pWelsTrace->m_sLogCtx, WELS_LOG_ERROR, "bParseOnly should be false for this API calling! \n");
-    m_pDecContext->iErrorCode |= dsInvalidArgument;
-    return dsInvalidArgument;
-  }
-  if (CheckBsBuffer (m_pDecContext, kiSrcLen)) {
-    if (ResetDecoder())
-      return dsOutOfMemory;
-
-    return dsErrorFree;
-  }
-  if (kiSrcLen > 0 && kpSrc != NULL) {
-#ifdef OUTPUT_BIT_STREAM
-    if (m_pFBS) {
-      WelsFwrite (kpSrc, sizeof (unsigned char), kiSrcLen, m_pFBS);
-      WelsFflush (m_pFBS);
-    }
-    if (m_pFBSSize) {
-      WelsFwrite (&kiSrcLen, sizeof (int), 1, m_pFBSSize);
-      WelsFflush (m_pFBSSize);
-    }
-#endif//OUTPUT_BIT_STREAM
-    m_pDecContext->bEndOfStreamFlag = false;
-  } else {
-    //For application MODE, the error detection should be added for safe.
-    //But for CONSOLE MODE, when decoding LAST AU, kiSrcLen==0 && kpSrc==NULL.
-    m_pDecContext->bEndOfStreamFlag = true;
-    m_pDecContext->bInstantDecFlag = true;
-  }
-
-  int64_t iStart, iEnd;
-  iStart = WelsTime();
-  ppDst[0] = ppDst[1] = ppDst[2] = NULL;
-  m_pDecContext->iErrorCode             = dsErrorFree; //initialize at the starting of AU decoding.
-  m_pDecContext->iFeedbackVclNalInAu = FEEDBACK_UNKNOWN_NAL; //initialize
-  unsigned long long uiInBsTimeStamp = pDstInfo->uiInBsTimeStamp;
-  memset (pDstInfo, 0, sizeof (SBufferInfo));
-  pDstInfo->uiInBsTimeStamp = uiInBsTimeStamp;
-#ifdef LONG_TERM_REF
-  m_pDecContext->bReferenceLostAtT0Flag       = false; //initialize for LTR
-  m_pDecContext->bCurAuContainLtrMarkSeFlag = false;
-  m_pDecContext->iFrameNumOfAuMarkedLtr      = 0;
-  m_pDecContext->iFrameNum                       = -1; //initialize
-#endif
-
-  m_pDecContext->iFeedbackTidInAu             = -1; //initialize
-  m_pDecContext->iFeedbackNalRefIdc           = -1; //initialize
-  if (pDstInfo) {
-    pDstInfo->uiOutYuvTimeStamp = 0;
-    m_pDecContext->uiTimeStamp = pDstInfo->uiInBsTimeStamp;
-  } else {
-    m_pDecContext->uiTimeStamp = 0;
-  }
-  WelsDecodeBs (m_pDecContext, kpSrc, kiSrcLen, ppDst,
-                pDstInfo, NULL); //iErrorCode has been modified in this function
-  m_pDecContext->bInstantDecFlag = false; //reset no-delay flag
-  if (m_pDecContext->iErrorCode) {
-    EWelsNalUnitType eNalType =
-      NAL_UNIT_UNSPEC_0; //for NBR, IDR frames are expected to decode as followed if error decoding an IDR currently
-
-    eNalType = m_pDecContext->sCurNalHead.eNalUnitType;
-
-    if (m_pDecContext->iErrorCode & dsOutOfMemory) {
-      if (ResetDecoder())
-        return dsOutOfMemory;
-
-      return dsErrorFree;
-    }
-    //for AVC bitstream (excluding AVC with temporal scalability, including TP), as long as error occur, SHOULD notify upper layer key frame loss.
-    if ((IS_PARAM_SETS_NALS (eNalType) || NAL_UNIT_CODED_SLICE_IDR == eNalType) ||
-        (VIDEO_BITSTREAM_AVC == m_pDecContext->eVideoType)) {
-      if (m_pDecContext->pParam->eEcActiveIdc == ERROR_CON_DISABLE) {
-#ifdef LONG_TERM_REF
-        m_pDecContext->bParamSetsLostFlag = true;
-#else
-        m_pDecContext->bReferenceLostAtT0Flag = true;
-#endif
-      }
-    }
-
-    if (m_pDecContext->bPrintFrameErrorTraceFlag) {
-      WelsLog (&m_pWelsTrace->m_sLogCtx, WELS_LOG_INFO, "decode failed, failure type:%d \n",
-               m_pDecContext->iErrorCode);
-      m_pDecContext->bPrintFrameErrorTraceFlag = false;
-    } else {
-      m_pDecContext->iIgnoredErrorInfoPacketCount ++;
-      if (m_pDecContext->iIgnoredErrorInfoPacketCount == INT_MAX) {
-        WelsLog (&m_pWelsTrace->m_sLogCtx, WELS_LOG_WARNING, "continuous error reached INT_MAX! Restart as 0.");
-        m_pDecContext->iIgnoredErrorInfoPacketCount = 0;
-      }
-    }
-    if ((m_pDecContext->pParam->eEcActiveIdc != ERROR_CON_DISABLE) && (pDstInfo->iBufferStatus == 1)) {
-      //TODO after dec status updated
-      m_pDecContext->iErrorCode |= dsDataErrorConcealed;
-
-      m_pDecContext->sDecoderStatistics.uiDecodedFrameCount++;
-      if (m_pDecContext->sDecoderStatistics.uiDecodedFrameCount == 0) { //exceed max value of uint32_t
-        ResetDecStatNums (&m_pDecContext->sDecoderStatistics);
-        m_pDecContext->sDecoderStatistics.uiDecodedFrameCount++;
-      }
-      int32_t iMbConcealedNum = m_pDecContext->iMbEcedNum + m_pDecContext->iMbEcedPropNum;
-      m_pDecContext->sDecoderStatistics.uiAvgEcRatio = m_pDecContext->iMbNum == 0 ?
-          (m_pDecContext->sDecoderStatistics.uiAvgEcRatio * m_pDecContext->sDecoderStatistics.uiEcFrameNum) : ((
-                m_pDecContext->sDecoderStatistics.uiAvgEcRatio * m_pDecContext->sDecoderStatistics.uiEcFrameNum) + ((
-                      iMbConcealedNum * 100) / m_pDecContext->iMbNum));
-      m_pDecContext->sDecoderStatistics.uiAvgEcPropRatio = m_pDecContext->iMbNum == 0 ?
-          (m_pDecContext->sDecoderStatistics.uiAvgEcPropRatio * m_pDecContext->sDecoderStatistics.uiEcFrameNum) : ((
-                m_pDecContext->sDecoderStatistics.uiAvgEcPropRatio * m_pDecContext->sDecoderStatistics.uiEcFrameNum) + ((
-                      m_pDecContext->iMbEcedPropNum * 100) / m_pDecContext->iMbNum));
-      m_pDecContext->sDecoderStatistics.uiEcFrameNum += (iMbConcealedNum == 0 ? 0 : 1);
-      m_pDecContext->sDecoderStatistics.uiAvgEcRatio = m_pDecContext->sDecoderStatistics.uiEcFrameNum == 0 ? 0 :
-          m_pDecContext->sDecoderStatistics.uiAvgEcRatio / m_pDecContext->sDecoderStatistics.uiEcFrameNum;
-      m_pDecContext->sDecoderStatistics.uiAvgEcPropRatio = m_pDecContext->sDecoderStatistics.uiEcFrameNum == 0 ? 0 :
-          m_pDecContext->sDecoderStatistics.uiAvgEcPropRatio / m_pDecContext->sDecoderStatistics.uiEcFrameNum;
-    }
-    iEnd = WelsTime();
-    m_pDecContext->dDecTime += (iEnd - iStart) / 1e3;
-
-    OutputStatisticsLog (m_pDecContext->sDecoderStatistics);
-
-    return (DECODING_STATE) m_pDecContext->iErrorCode;
-  }
-  // else Error free, the current codec works well
-
-  if (pDstInfo->iBufferStatus == 1) {
-
-    m_pDecContext->sDecoderStatistics.uiDecodedFrameCount++;
-    if (m_pDecContext->sDecoderStatistics.uiDecodedFrameCount == 0) { //exceed max value of uint32_t
-      ResetDecStatNums (&m_pDecContext->sDecoderStatistics);
-      m_pDecContext->sDecoderStatistics.uiDecodedFrameCount++;
-    }
-
-    OutputStatisticsLog (m_pDecContext->sDecoderStatistics);
-  }
-  iEnd = WelsTime();
-  m_pDecContext->dDecTime += (iEnd - iStart) / 1e3;
-
-
-
-  return dsErrorFree;
-}
-
-void CWelsDecoder::OutputStatisticsLog (SDecoderStatistics& sDecoderStatistics) {
-  if ((sDecoderStatistics.uiDecodedFrameCount > 0) && (sDecoderStatistics.iStatisticsLogInterval > 0)
-      && ((sDecoderStatistics.uiDecodedFrameCount % sDecoderStatistics.iStatisticsLogInterval) == 0)) {
-    WelsLog (&m_pWelsTrace->m_sLogCtx, WELS_LOG_INFO,
-             "DecoderStatistics: uiWidth=%d, uiHeight=%d, fAverageFrameSpeedInMs=%.1f, fActualAverageFrameSpeedInMs=%.1f, \
-              uiDecodedFrameCount=%d, uiResolutionChangeTimes=%d, uiIDRCorrectNum=%d, \
-              uiAvgEcRatio=%d, uiAvgEcPropRatio=%d, uiEcIDRNum=%d, uiEcFrameNum=%d, \
-              uiIDRLostNum=%d, uiFreezingIDRNum=%d, uiFreezingNonIDRNum=%d, iAvgLumaQp=%d, \
-              iSpsReportErrorNum=%d, iSubSpsReportErrorNum=%d, iPpsReportErrorNum=%d, iSpsNoExistNalNum=%d, iSubSpsNoExistNalNum=%d, iPpsNoExistNalNum=%d, \
-              uiProfile=%d, uiLevel=%d, \
-              iCurrentActiveSpsId=%d, iCurrentActivePpsId=%d,",
-             sDecoderStatistics.uiWidth,
-             sDecoderStatistics.uiHeight,
-             sDecoderStatistics.fAverageFrameSpeedInMs,
-             sDecoderStatistics.fActualAverageFrameSpeedInMs,
-
-             sDecoderStatistics.uiDecodedFrameCount,
-             sDecoderStatistics.uiResolutionChangeTimes,
-             sDecoderStatistics.uiIDRCorrectNum,
-
-             sDecoderStatistics.uiAvgEcRatio,
-             sDecoderStatistics.uiAvgEcPropRatio,
-             sDecoderStatistics.uiEcIDRNum,
-             sDecoderStatistics.uiEcFrameNum,
-
-             sDecoderStatistics.uiIDRLostNum,
-             sDecoderStatistics.uiFreezingIDRNum,
-             sDecoderStatistics.uiFreezingNonIDRNum,
-             sDecoderStatistics.iAvgLumaQp,
-
-             sDecoderStatistics.iSpsReportErrorNum,
-             sDecoderStatistics.iSubSpsReportErrorNum,
-             sDecoderStatistics.iPpsReportErrorNum,
-             sDecoderStatistics.iSpsNoExistNalNum,
-             sDecoderStatistics.iSubSpsNoExistNalNum,
-             sDecoderStatistics.iPpsNoExistNalNum,
-
-             sDecoderStatistics.uiProfile,
-             sDecoderStatistics.uiLevel,
-
-             sDecoderStatistics.iCurrentActiveSpsId,
-             sDecoderStatistics.iCurrentActivePpsId);
-  }
-}
-
-DECODING_STATE CWelsDecoder::DecodeParser (const unsigned char* kpSrc,
-    const int kiSrcLen,
-    SParserBsInfo* pDstInfo) {
-  if (m_pDecContext == NULL || m_pDecContext->pParam == NULL) {
-    if (m_pWelsTrace != NULL) {
-      WelsLog (&m_pWelsTrace->m_sLogCtx, WELS_LOG_ERROR, "Call DecodeParser without Initialize.\n");
-    }
-    return dsInitialOptExpected;
-  }
-
-  if (!m_pDecContext->pParam->bParseOnly) {
-    WelsLog (&m_pWelsTrace->m_sLogCtx, WELS_LOG_ERROR, "bParseOnly should be true for this API calling! \n");
-    m_pDecContext->iErrorCode |= dsInvalidArgument;
-    return dsInvalidArgument;
-  }
-  int64_t iEnd, iStart = WelsTime();
-  if (CheckBsBuffer (m_pDecContext, kiSrcLen)) {
-    if (ResetDecoder())
-      return dsOutOfMemory;
-
-    return dsErrorFree;
-  }
-  if (kiSrcLen > 0 && kpSrc != NULL) {
-#ifdef OUTPUT_BITSTREAM
-    if (m_pFBS) {
-      WelsFwrite (kpSrc, sizeof (unsigned char), kiSrcLen, m_pFBS);
-      WelsFflush (m_pFBS);
-    }
-#endif//OUTPUT_BIT_STREAM
-    m_pDecContext->bEndOfStreamFlag = false;
-  } else {
-    //For application MODE, the error detection should be added for safe.
-    //But for CONSOLE MODE, when decoding LAST AU, kiSrcLen==0 && kpSrc==NULL.
-    m_pDecContext->bEndOfStreamFlag = true;
-    m_pDecContext->bInstantDecFlag = true;
-  }
-
-  m_pDecContext->iErrorCode = dsErrorFree; //initialize at the starting of AU decoding.
-  m_pDecContext->pParam->eEcActiveIdc = ERROR_CON_DISABLE; //add protection to disable EC here.
-  m_pDecContext->iFeedbackNalRefIdc = -1; //initialize
-  if (!m_pDecContext->bFramePending) { //frame complete
-    m_pDecContext->pParserBsInfo->iNalNum = 0;
-    memset (m_pDecContext->pParserBsInfo->pNalLenInByte, 0, MAX_NAL_UNITS_IN_LAYER);
-  }
-  pDstInfo->iNalNum = 0;
-  pDstInfo->iSpsWidthInPixel = pDstInfo->iSpsHeightInPixel = 0;
-  if (pDstInfo) {
-    m_pDecContext->uiTimeStamp = pDstInfo->uiInBsTimeStamp;
-    pDstInfo->uiOutBsTimeStamp = 0;
-  } else {
-    m_pDecContext->uiTimeStamp = 0;
-  }
-  WelsDecodeBs (m_pDecContext, kpSrc, kiSrcLen, NULL, NULL, pDstInfo);
-  if (m_pDecContext->iErrorCode & dsOutOfMemory) {
-    if (ResetDecoder())
-      return dsOutOfMemory;
-    return dsErrorFree;
-  }
-
-  if (!m_pDecContext->bFramePending && m_pDecContext->pParserBsInfo->iNalNum) {
-    memcpy (pDstInfo, m_pDecContext->pParserBsInfo, sizeof (SParserBsInfo));
-
-    if (m_pDecContext->iErrorCode == ERR_NONE) { //update statistics: decoding frame count
-      m_pDecContext->sDecoderStatistics.uiDecodedFrameCount++;
-      if (m_pDecContext->sDecoderStatistics.uiDecodedFrameCount == 0) { //exceed max value of uint32_t
-        ResetDecStatNums (&m_pDecContext->sDecoderStatistics);
-        m_pDecContext->sDecoderStatistics.uiDecodedFrameCount++;
-      }
-    }
-  }
-
-  m_pDecContext->bInstantDecFlag = false; //reset no-delay flag
-
-  if (m_pDecContext->iErrorCode && m_pDecContext->bPrintFrameErrorTraceFlag) {
-    WelsLog (&m_pWelsTrace->m_sLogCtx, WELS_LOG_INFO, "decode failed, failure type:%d \n", m_pDecContext->iErrorCode);
-    m_pDecContext->bPrintFrameErrorTraceFlag = false;
-  }
-  iEnd = WelsTime();
-  m_pDecContext->dDecTime += (iEnd - iStart) / 1e3;
-
-  return (DECODING_STATE) m_pDecContext->iErrorCode;
-}
-
-DECODING_STATE CWelsDecoder::DecodeFrame (const unsigned char* kpSrc,
-    const int kiSrcLen,
-    unsigned char** ppDst,
-    int* pStride,
-    int& iWidth,
-    int& iHeight) {
-  DECODING_STATE eDecState = dsErrorFree;
-  SBufferInfo    DstInfo;
-
-  memset (&DstInfo, 0, sizeof (SBufferInfo));
-  DstInfo.UsrData.sSystemBuffer.iStride[0] = pStride[0];
-  DstInfo.UsrData.sSystemBuffer.iStride[1] = pStride[1];
-  DstInfo.UsrData.sSystemBuffer.iWidth = iWidth;
-  DstInfo.UsrData.sSystemBuffer.iHeight = iHeight;
-
-  eDecState = DecodeFrame2 (kpSrc, kiSrcLen, ppDst, &DstInfo);
-  if (eDecState == dsErrorFree) {
-    pStride[0] = DstInfo.UsrData.sSystemBuffer.iStride[0];
-    pStride[1] = DstInfo.UsrData.sSystemBuffer.iStride[1];
-    iWidth     = DstInfo.UsrData.sSystemBuffer.iWidth;
-    iHeight    = DstInfo.UsrData.sSystemBuffer.iHeight;
-  }
-
-  return eDecState;
-}
-
-DECODING_STATE CWelsDecoder::DecodeFrameEx (const unsigned char* kpSrc,
-    const int kiSrcLen,
-    unsigned char* pDst,
-    int iDstStride,
-    int& iDstLen,
-    int& iWidth,
-    int& iHeight,
-    int& iColorFormat) {
-  DECODING_STATE state = dsErrorFree;
-
-  return state;
-}
-
-
-} // namespace WelsDec
-
-
-using namespace WelsDec;
-/*
-*       WelsGetDecoderCapability
-*       @return: DecCapability information
-*/
-int WelsGetDecoderCapability (SDecoderCapability* pDecCapability) {
-  memset (pDecCapability, 0, sizeof (SDecoderCapability));
-  pDecCapability->iProfileIdc = 66; //Baseline
-  pDecCapability->iProfileIop = 0xE0; //11100000b
-  pDecCapability->iLevelIdc = 32; //level_idc = 3.2
-  pDecCapability->iMaxMbps = 216000; //from level_idc = 3.2
-  pDecCapability->iMaxFs = 5120; //from level_idc = 3.2
-  pDecCapability->iMaxCpb = 20000; //from level_idc = 3.2
-  pDecCapability->iMaxDpb = 20480; //from level_idc = 3.2
-  pDecCapability->iMaxBr = 20000; //from level_idc = 3.2
-  pDecCapability->bRedPicCap = 0; //not support redundant pic
-
-  return ERR_NONE;
-}
-/* WINAPI is indeed in prefix due to sync to application layer callings!! */
-
-/*
-*   WelsCreateDecoder
-*   @return:    success in return 0, otherwise failed.
-*/
-long WelsCreateDecoder (ISVCDecoder** ppDecoder) {
-
-  if (NULL == ppDecoder) {
-    return ERR_INVALID_PARAMETERS;
-  }
-
-  *ppDecoder = new CWelsDecoder();
-
-  if (NULL == *ppDecoder) {
-    return ERR_MALLOC_FAILED;
-  }
-
-  return ERR_NONE;
-}
-
-/*
-*   WelsDestroyDecoder
-*/
-void WelsDestroyDecoder (ISVCDecoder* pDecoder) {
-  if (NULL != pDecoder) {
-    delete (CWelsDecoder*)pDecoder;
-  }
-}
+/*!
+ * \copy
+ *     Copyright (c)  2009-2013, Cisco Systems
+ *     All rights reserved.
+ *
+ *     Redistribution and use in source and binary forms, with or without
+ *     modification, are permitted provided that the following conditions
+ *     are met:
+ *
+ *        * Redistributions of source code must retain the above copyright
+ *          notice, this list of conditions and the following disclaimer.
+ *
+ *        * Redistributions in binary form must reproduce the above copyright
+ *          notice, this list of conditions and the following disclaimer in
+ *          the documentation and/or other materials provided with the
+ *          distribution.
+ *
+ *     THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ *     "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ *     LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+ *     FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+ *     COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ *     INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ *     BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ *     LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+ *     CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ *     LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
+ *     ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ *     POSSIBILITY OF SUCH DAMAGE.
+ *
+ *
+ *  welsDecoderExt.cpp
+ *
+ *  Abstract
+ *      Cisco OpenH264 decoder extension utilization
+ *
+ *  History
+ *      3/12/2009 Created
+ *
+ *
+ ************************************************************************/
+//#include <assert.h>
+#include "welsDecoderExt.h"
+#include "welsCodecTrace.h"
+#include "codec_def.h"
+#include "typedefs.h"
+#include "memory_align.h"
+#include "utils.h"
+#include "version.h"
+
+//#include "macros.h"
+#include "decoder.h"
+#include "decoder_core.h"
+#include "error_concealment.h"
+
+#include "measure_time.h"
+extern "C" {
+#include "decoder_core.h"
+#include "manage_dec_ref.h"
+}
+#include "error_code.h"
+#include "crt_util_safe_x.h" // Safe CRT routines like util for cross platforms
+#include <time.h>
+#if defined(_WIN32) /*&& defined(_DEBUG)*/
+
+#include <windows.h>
+#include <stdio.h>
+#include <stdarg.h>
+#include <sys/types.h>
+#else
+#include <sys/time.h>
+#endif
+
+#define _PICTURE_REORDERING_ 1
+
+static int32_t sIMinInt32 = -0x7FFFFFFF;
+
+namespace WelsDec {
+
+//////////////////////////////////////////////////////////////////////
+// Construction/Destruction
+//////////////////////////////////////////////////////////////////////
+
+/***************************************************************************
+*   Description:
+*       class CWelsDecoder constructor function, do initialization  and
+*       alloc memory required
+*
+*   Input parameters: none
+*
+*   return: none
+***************************************************************************/
+CWelsDecoder::CWelsDecoder (void)
+  : m_pDecContext (NULL),
+    m_pWelsTrace (NULL),
+    m_iPictInfoIndex (0),
+    m_iMinPOC (sIMinInt32),
+    m_iNumOfPicts (0),
+    m_iLastGOPRemainPicts (0),
+    m_LastWrittenPOC (sIMinInt32),
+    m_iLargestBufferedPicIndex (0) {
+#ifdef OUTPUT_BIT_STREAM
+  char chFileName[1024] = { 0 };  //for .264
+  int iBufUsed = 0;
+  int iBufLeft = 1023;
+  int iCurUsed;
+
+  char chFileNameSize[1024] = { 0 }; //for .len
+  int iBufUsedSize = 0;
+  int iBufLeftSize = 1023;
+  int iCurUsedSize;
+#endif//OUTPUT_BIT_STREAM
+
+
+  m_pWelsTrace = new welsCodecTrace();
+  if (m_pWelsTrace != NULL) {
+    m_pWelsTrace->SetCodecInstance (this);
+    m_pWelsTrace->SetTraceLevel (WELS_LOG_ERROR);
+
+    WelsLog (&m_pWelsTrace->m_sLogCtx, WELS_LOG_INFO, "CWelsDecoder::CWelsDecoder() entry");
+  }
+
+  for (int32_t i = 0; i < 16; ++i) {
+    m_sPictInfoList[i].bLastGOP = false;
+    m_sPictInfoList[i].iPOC = sIMinInt32;
+  }
+
+#ifdef OUTPUT_BIT_STREAM
+  SWelsTime sCurTime;
+
+  WelsGetTimeOfDay (&sCurTime);
+
+  iCurUsed = WelsSnprintf (chFileName, iBufLeft, "bs_0x%p_", (void*)this);
+  iCurUsedSize = WelsSnprintf (chFileNameSize, iBufLeftSize, "size_0x%p_", (void*)this);
+
+  iBufUsed += iCurUsed;
+  iBufLeft -= iCurUsed;
+  if (iBufLeft > 0) {
+    iCurUsed = WelsStrftime (&chFileName[iBufUsed], iBufLeft, "%y%m%d%H%M%S", &sCurTime);
+    iBufUsed += iCurUsed;
+    iBufLeft -= iCurUsed;
+  }
+
+  iBufUsedSize += iCurUsedSize;
+  iBufLeftSize -= iCurUsedSize;
+  if (iBufLeftSize > 0) {
+    iCurUsedSize = WelsStrftime (&chFileNameSize[iBufUsedSize], iBufLeftSize, "%y%m%d%H%M%S", &sCurTime);
+    iBufUsedSize += iCurUsedSize;
+    iBufLeftSize -= iCurUsedSize;
+  }
+
+  if (iBufLeft > 0) {
+    iCurUsed = WelsSnprintf (&chFileName[iBufUsed], iBufLeft, ".%03.3u.264", WelsGetMillisecond (&sCurTime));
+    iBufUsed += iCurUsed;
+    iBufLeft -= iCurUsed;
+  }
+
+  if (iBufLeftSize > 0) {
+    iCurUsedSize = WelsSnprintf (&chFileNameSize[iBufUsedSize], iBufLeftSize, ".%03.3u.len",
+                                 WelsGetMillisecond (&sCurTime));
+    iBufUsedSize += iCurUsedSize;
+    iBufLeftSize -= iCurUsedSize;
+  }
+
+
+  m_pFBS = WelsFopen (chFileName, "wb");
+  m_pFBSSize = WelsFopen (chFileNameSize, "wb");
+#endif//OUTPUT_BIT_STREAM
+}
+
+/***************************************************************************
+*   Description:
+*       class CWelsDecoder destructor function, destroy allocced memory
+*
+*   Input parameters: none
+*
+*   return: none
+***************************************************************************/
+CWelsDecoder::~CWelsDecoder() {
+  if (m_pWelsTrace != NULL) {
+    WelsLog (&m_pWelsTrace->m_sLogCtx, WELS_LOG_INFO, "CWelsDecoder::~CWelsDecoder()");
+  }
+
+  UninitDecoder();
+
+#ifdef OUTPUT_BIT_STREAM
+  if (m_pFBS) {
+    WelsFclose (m_pFBS);
+    m_pFBS = NULL;
+  }
+  if (m_pFBSSize) {
+    WelsFclose (m_pFBSSize);
+    m_pFBSSize = NULL;
+  }
+#endif//OUTPUT_BIT_STREAM
+
+  if (m_pWelsTrace != NULL) {
+    delete m_pWelsTrace;
+    m_pWelsTrace = NULL;
+  }
+}
+
+long CWelsDecoder::Initialize (const SDecodingParam* pParam) {
+  int iRet = ERR_NONE;
+  if (m_pWelsTrace == NULL) {
+    return cmMallocMemeError;
+  }
+
+  if (pParam == NULL) {
+    WelsLog (&m_pWelsTrace->m_sLogCtx, WELS_LOG_ERROR, "CWelsDecoder::Initialize(), invalid input argument.");
+    return cmInitParaError;
+  }
+
+  // H.264 decoder initialization,including memory allocation,then open it ready to decode
+  iRet = InitDecoder (pParam);
+  if (iRet)
+    return iRet;
+
+  return cmResultSuccess;
+}
+
+long CWelsDecoder::Uninitialize() {
+  UninitDecoder();
+
+  return ERR_NONE;
+}
+
+void CWelsDecoder::UninitDecoder (void) {
+  if (NULL == m_pDecContext)
+    return;
+
+  WelsLog (&m_pWelsTrace->m_sLogCtx, WELS_LOG_INFO, "CWelsDecoder::UninitDecoder(), openh264 codec version = %s.",
+           VERSION_NUMBER);
+
+  WelsEndDecoder (m_pDecContext);
+
+  if (m_pDecContext->pMemAlign != NULL) {
+    WelsLog (&m_pWelsTrace->m_sLogCtx, WELS_LOG_INFO,
+             "CWelsDecoder::UninitDecoder(), verify memory usage (%d bytes) after free..",
+             m_pDecContext->pMemAlign->WelsGetMemoryUsage());
+    delete m_pDecContext->pMemAlign;
+    m_pDecContext->pMemAlign = NULL;
+  }
+
+  if (NULL != m_pDecContext) {
+    WelsFree (m_pDecContext, "m_pDecContext");
+
+    m_pDecContext = NULL;
+  }
+}
+
+// the return value of this function is not suitable, it need report failure info to upper layer.
+int32_t CWelsDecoder::InitDecoder (const SDecodingParam* pParam) {
+
+  WelsLog (&m_pWelsTrace->m_sLogCtx, WELS_LOG_INFO,
+           "CWelsDecoder::init_decoder(), openh264 codec version = %s, ParseOnly = %d",
+           VERSION_NUMBER, (int32_t)pParam->bParseOnly);
+
+  //reset decoder context
+  if (m_pDecContext) //free
+    UninitDecoder();
+  m_pDecContext = (PWelsDecoderContext)WelsMallocz (sizeof (SWelsDecoderContext), "m_pDecContext");
+  if (NULL == m_pDecContext)
+    return cmMallocMemeError;
+  int32_t iCacheLineSize = 16;   // on chip cache line size in byte
+  m_pDecContext->pMemAlign = new CMemoryAlign (iCacheLineSize);
+  WELS_VERIFY_RETURN_PROC_IF (cmMallocMemeError, (NULL == m_pDecContext->pMemAlign), UninitDecoder())
+
+  //fill in default value into context
+  WelsDecoderDefaults (m_pDecContext, &m_pWelsTrace->m_sLogCtx);
+
+  //check param and update decoder context
+  m_pDecContext->pParam = (SDecodingParam*)m_pDecContext->pMemAlign->WelsMallocz (sizeof (SDecodingParam),
+                          "SDecodingParam");
+  WELS_VERIFY_RETURN_PROC_IF (cmMallocMemeError, (NULL == m_pDecContext->pParam), UninitDecoder());
+  int32_t iRet = DecoderConfigParam (m_pDecContext, pParam);
+  WELS_VERIFY_RETURN_IFNEQ (iRet, cmResultSuccess);
+
+  //init decoder
+  WELS_VERIFY_RETURN_PROC_IF (cmMallocMemeError, WelsInitDecoder (m_pDecContext, &m_pWelsTrace->m_sLogCtx),
+                              UninitDecoder())
+
+  return cmResultSuccess;
+}
+
+int32_t CWelsDecoder::ResetDecoder() {
+  // TBC: need to be modified when context and trace point are null
+  if (m_pDecContext != NULL && m_pWelsTrace != NULL) {
+    WelsLog (&m_pWelsTrace->m_sLogCtx, WELS_LOG_INFO, "ResetDecoder(), context error code is %d",
+             m_pDecContext->iErrorCode);
+    SDecodingParam sPrevParam;
+    memcpy (&sPrevParam, m_pDecContext->pParam, sizeof (SDecodingParam));
+
+    WELS_VERIFY_RETURN_PROC_IF (cmInitParaError, InitDecoder (&sPrevParam), UninitDecoder());
+  } else if (m_pWelsTrace != NULL) {
+    WelsLog (&m_pWelsTrace->m_sLogCtx, WELS_LOG_ERROR, "ResetDecoder() failed as decoder context null");
+  }
+  return ERR_INFO_UNINIT;
+}
+
+/*
+ * Set Option
+ */
+long CWelsDecoder::SetOption (DECODER_OPTION eOptID, void* pOption) {
+  int iVal = 0;
+
+  if (m_pDecContext == NULL && eOptID != DECODER_OPTION_TRACE_LEVEL &&
+      eOptID != DECODER_OPTION_TRACE_CALLBACK && eOptID != DECODER_OPTION_TRACE_CALLBACK_CONTEXT)
+    return dsInitialOptExpected;
+  if (eOptID == DECODER_OPTION_END_OF_STREAM) { // Indicate bit-stream of the final frame to be decoded
+    if (pOption == NULL)
+      return cmInitParaError;
+
+    iVal = * ((int*)pOption); // boolean value for whether enabled End Of Stream flag
+
+    m_pDecContext->bEndOfStreamFlag = iVal ? true : false;
+
+    return cmResultSuccess;
+  } else if (eOptID == DECODER_OPTION_ERROR_CON_IDC) { // Indicate error concealment status
+    if (pOption == NULL)
+      return cmInitParaError;
+
+    iVal = * ((int*)pOption); // int value for error concealment idc
+    iVal = WELS_CLIP3 (iVal, (int32_t)ERROR_CON_DISABLE, (int32_t)ERROR_CON_SLICE_MV_COPY_CROSS_IDR_FREEZE_RES_CHANGE);
+    if ((m_pDecContext->pParam->bParseOnly) && (iVal != (int32_t)ERROR_CON_DISABLE)) {
+      WelsLog (&m_pWelsTrace->m_sLogCtx, WELS_LOG_INFO,
+               "CWelsDecoder::SetOption for ERROR_CON_IDC = %d not allowd for parse only!.", iVal);
+      return cmInitParaError;
+    }
+
+    m_pDecContext->pParam->eEcActiveIdc = (ERROR_CON_IDC)iVal;
+    InitErrorCon (m_pDecContext);
+    WelsLog (&m_pWelsTrace->m_sLogCtx, WELS_LOG_INFO,
+             "CWelsDecoder::SetOption for ERROR_CON_IDC = %d.", iVal);
+
+    return cmResultSuccess;
+  } else if (eOptID == DECODER_OPTION_TRACE_LEVEL) {
+    if (m_pWelsTrace) {
+      uint32_t level = * ((uint32_t*)pOption);
+      m_pWelsTrace->SetTraceLevel (level);
+    }
+    return cmResultSuccess;
+  } else if (eOptID == DECODER_OPTION_TRACE_CALLBACK) {
+    if (m_pWelsTrace) {
+      WelsTraceCallback callback = * ((WelsTraceCallback*)pOption);
+      m_pWelsTrace->SetTraceCallback (callback);
+      WelsLog (&m_pWelsTrace->m_sLogCtx, WELS_LOG_INFO,
+               "CWelsDecoder::SetOption():DECODER_OPTION_TRACE_CALLBACK callback = %p.",
+               callback);
+    }
+    return cmResultSuccess;
+  } else if (eOptID == DECODER_OPTION_TRACE_CALLBACK_CONTEXT) {
+    if (m_pWelsTrace) {
+      void* ctx = * ((void**)pOption);
+      m_pWelsTrace->SetTraceCallbackContext (ctx);
+    }
+    return cmResultSuccess;
+  } else if (eOptID == DECODER_OPTION_GET_STATISTICS) {
+    WelsLog (&m_pWelsTrace->m_sLogCtx, WELS_LOG_WARNING,
+             "CWelsDecoder::SetOption():DECODER_OPTION_GET_STATISTICS: this option is get-only!");
+    return cmInitParaError;
+  } else if (eOptID == DECODER_OPTION_STATISTICS_LOG_INTERVAL) {
+    if (pOption) {
+      m_pDecContext->sDecoderStatistics.iStatisticsLogInterval = (* ((unsigned int*)pOption));
+      return cmResultSuccess;
+    }
+  } else if (eOptID == DECODER_OPTION_GET_SAR_INFO) {
+    WelsLog (&m_pWelsTrace->m_sLogCtx, WELS_LOG_WARNING,
+             "CWelsDecoder::SetOption():DECODER_OPTION_GET_SAR_INFO: this option is get-only!");
+    return cmInitParaError;
+  }
+  return cmInitParaError;
+}
+
+/*
+ *  Get Option
+ */
+long CWelsDecoder::GetOption (DECODER_OPTION eOptID, void* pOption) {
+  int iVal = 0;
+
+  if (m_pDecContext == NULL)
+    return cmInitExpected;
+
+  if (pOption == NULL)
+    return cmInitParaError;
+
+  if (DECODER_OPTION_END_OF_STREAM == eOptID) {
+    iVal = m_pDecContext->bEndOfStreamFlag;
+    * ((int*)pOption) = iVal;
+    return cmResultSuccess;
+  }
+#ifdef LONG_TERM_REF
+  else if (DECODER_OPTION_IDR_PIC_ID == eOptID) {
+    iVal = m_pDecContext->uiCurIdrPicId;
+    * ((int*)pOption) = iVal;
+    return cmResultSuccess;
+  } else if (DECODER_OPTION_FRAME_NUM == eOptID) {
+    iVal = m_pDecContext->iFrameNum;
+    * ((int*)pOption) = iVal;
+    return cmResultSuccess;
+  } else if (DECODER_OPTION_LTR_MARKING_FLAG == eOptID) {
+    iVal = m_pDecContext->bCurAuContainLtrMarkSeFlag;
+    * ((int*)pOption) = iVal;
+    return cmResultSuccess;
+  } else if (DECODER_OPTION_LTR_MARKED_FRAME_NUM == eOptID) {
+    iVal = m_pDecContext->iFrameNumOfAuMarkedLtr;
+    * ((int*)pOption) = iVal;
+    return cmResultSuccess;
+  }
+#endif
+  else if (DECODER_OPTION_VCL_NAL == eOptID) { //feedback whether or not have VCL NAL in current AU
+    iVal = m_pDecContext->iFeedbackVclNalInAu;
+    * ((int*)pOption) = iVal;
+    return cmResultSuccess;
+  } else if (DECODER_OPTION_TEMPORAL_ID == eOptID) { //if have VCL NAL in current AU, then feedback the temporal ID
+    iVal = m_pDecContext->iFeedbackTidInAu;
+    * ((int*)pOption) = iVal;
+    return cmResultSuccess;
+  } else if (DECODER_OPTION_IS_REF_PIC == eOptID) {
+    iVal = m_pDecContext->iFeedbackNalRefIdc;
+    if (iVal > 0)
+      iVal = 1;
+    * ((int*)pOption) = iVal;
+    return cmResultSuccess;
+  } else if (DECODER_OPTION_ERROR_CON_IDC == eOptID) {
+    iVal = (int)m_pDecContext->pParam->eEcActiveIdc;
+    * ((int*)pOption) = iVal;
+    return cmResultSuccess;
+  } else if (DECODER_OPTION_GET_STATISTICS == eOptID) { // get decoder statistics info for real time debugging
+    SDecoderStatistics* pDecoderStatistics = (static_cast<SDecoderStatistics*> (pOption));
+
+    memcpy (pDecoderStatistics, &m_pDecContext->sDecoderStatistics, sizeof (SDecoderStatistics));
+
+    if (m_pDecContext->sDecoderStatistics.uiDecodedFrameCount != 0) { //not original status
+      pDecoderStatistics->fAverageFrameSpeedInMs = (float) (m_pDecContext->dDecTime) /
+          (m_pDecContext->sDecoderStatistics.uiDecodedFrameCount);
+      pDecoderStatistics->fActualAverageFrameSpeedInMs = (float) (m_pDecContext->dDecTime) /
+          (m_pDecContext->sDecoderStatistics.uiDecodedFrameCount + m_pDecContext->sDecoderStatistics.uiFreezingIDRNum +
+           m_pDecContext->sDecoderStatistics.uiFreezingNonIDRNum);
+    }
+    return cmResultSuccess;
+  } else if (eOptID == DECODER_OPTION_STATISTICS_LOG_INTERVAL) {
+    if (pOption) {
+      iVal = m_pDecContext->sDecoderStatistics.iStatisticsLogInterval;
+      * ((unsigned int*)pOption) = iVal;
+      return cmResultSuccess;
+    }
+  } else if (DECODER_OPTION_GET_SAR_INFO == eOptID) { //get decoder SAR info in VUI
+    PVuiSarInfo pVuiSarInfo = (static_cast<PVuiSarInfo> (pOption));
+    memset (pVuiSarInfo, 0, sizeof (SVuiSarInfo));
+    if (!m_pDecContext->pSps) {
+      return cmInitExpected;
+    } else {
+      pVuiSarInfo->uiSarWidth = m_pDecContext->pSps->sVui.uiSarWidth;
+      pVuiSarInfo->uiSarHeight = m_pDecContext->pSps->sVui.uiSarHeight;
+      pVuiSarInfo->bOverscanAppropriateFlag = m_pDecContext->pSps->sVui.bOverscanAppropriateFlag;
+      return cmResultSuccess;
+    }
+  } else if (DECODER_OPTION_PROFILE == eOptID) {
+    if (!m_pDecContext->pSps) {
+      return cmInitExpected;
+    }
+    iVal = (int)m_pDecContext->pSps->uiProfileIdc;
+    * ((int*)pOption) = iVal;
+    return cmResultSuccess;
+  } else if (DECODER_OPTION_LEVEL == eOptID) {
+    if (!m_pDecContext->pSps) {
+      return cmInitExpected;
+    }
+    iVal = (int)m_pDecContext->pSps->uiLevelIdc;
+    * ((int*)pOption) = iVal;
+    return cmResultSuccess;
+  } else if (DECODER_OPTION_NUM_OF_FRAMES_REMAINING_IN_BUFFER == eOptID) {
+    if (m_pDecContext->pSps && m_pDecContext->pSps->uiProfileIdc != 66 && m_pDecContext->pPps->bEntropyCodingModeFlag) {
+      * ((int*)pOption) = m_iNumOfPicts > 0 ? m_iNumOfPicts : 0;
+    }
+    return cmResultSuccess;
+  }
+
+  return cmInitParaError;
+}
+
+DECODING_STATE CWelsDecoder::DecodeFrameNoDelay (const unsigned char* kpSrc,
+    const int kiSrcLen,
+    unsigned char** ppDst,
+    SBufferInfo* pDstInfo) {
+  int iRet;
+  //SBufferInfo sTmpBufferInfo;
+  //unsigned char* ppTmpDst[3] = {NULL, NULL, NULL};
+  iRet = (int)DecodeFrame2 (kpSrc, kiSrcLen, ppDst, pDstInfo);
+  //memcpy (&sTmpBufferInfo, pDstInfo, sizeof (SBufferInfo));
+  //ppTmpDst[0] = ppDst[0];
+  //ppTmpDst[1] = ppDst[1];
+  //ppTmpDst[2] = ppDst[2];
+  iRet |= DecodeFrame2 (NULL, 0, ppDst, pDstInfo);
+  //if ((pDstInfo->iBufferStatus == 0) && (sTmpBufferInfo.iBufferStatus == 1)) {
+  //memcpy (pDstInfo, &sTmpBufferInfo, sizeof (SBufferInfo));
+  //ppDst[0] = ppTmpDst[0];
+  //ppDst[1] = ppTmpDst[1];
+  //ppDst[2] = ppTmpDst[2];
+  //}
+  return (DECODING_STATE)iRet;
+}
+
+DECODING_STATE CWelsDecoder::DecodeFrame2 (const unsigned char* kpSrc,
+    const int kiSrcLen,
+    unsigned char** ppDst,
+    SBufferInfo* pDstInfo) {
+  if (m_pDecContext == NULL || m_pDecContext->pParam == NULL) {
+    if (m_pWelsTrace != NULL) {
+      WelsLog (&m_pWelsTrace->m_sLogCtx, WELS_LOG_ERROR, "Call DecodeFrame2 without Initialize.\n");
+    }
+    return dsInitialOptExpected;
+  }
+
+  if (m_pDecContext->pParam->bParseOnly) {
+    WelsLog (&m_pWelsTrace->m_sLogCtx, WELS_LOG_ERROR, "bParseOnly should be false for this API calling! \n");
+    m_pDecContext->iErrorCode |= dsInvalidArgument;
+    return dsInvalidArgument;
+  }
+  if (CheckBsBuffer (m_pDecContext, kiSrcLen)) {
+    if (ResetDecoder())
+      return dsOutOfMemory;
+
+    return dsErrorFree;
+  }
+  if (kiSrcLen > 0 && kpSrc != NULL) {
+#ifdef OUTPUT_BIT_STREAM
+    if (m_pFBS) {
+      WelsFwrite (kpSrc, sizeof (unsigned char), kiSrcLen, m_pFBS);
+      WelsFflush (m_pFBS);
+    }
+    if (m_pFBSSize) {
+      WelsFwrite (&kiSrcLen, sizeof (int), 1, m_pFBSSize);
+      WelsFflush (m_pFBSSize);
+    }
+#endif//OUTPUT_BIT_STREAM
+    m_pDecContext->bEndOfStreamFlag = false;
+  } else {
+    //For application MODE, the error detection should be added for safe.
+    //But for CONSOLE MODE, when decoding LAST AU, kiSrcLen==0 && kpSrc==NULL.
+    m_pDecContext->bEndOfStreamFlag = true;
+    m_pDecContext->bInstantDecFlag = true;
+  }
+
+  int64_t iStart, iEnd;
+  iStart = WelsTime();
+
+  ppDst[0] = ppDst[1] = ppDst[2] = NULL;
+  m_pDecContext->iErrorCode = dsErrorFree; //initialize at the starting of AU decoding.
+  m_pDecContext->iFeedbackVclNalInAu = FEEDBACK_UNKNOWN_NAL; //initialize
+  unsigned long long uiInBsTimeStamp = pDstInfo->uiInBsTimeStamp;
+  memset (pDstInfo, 0, sizeof (SBufferInfo));
+  pDstInfo->uiInBsTimeStamp = uiInBsTimeStamp;
+#ifdef LONG_TERM_REF
+  m_pDecContext->bReferenceLostAtT0Flag = false; //initialize for LTR
+  m_pDecContext->bCurAuContainLtrMarkSeFlag = false;
+  m_pDecContext->iFrameNumOfAuMarkedLtr = 0;
+  m_pDecContext->iFrameNum = -1; //initialize
+#endif
+
+  m_pDecContext->iFeedbackTidInAu = -1; //initialize
+  m_pDecContext->iFeedbackNalRefIdc = -1; //initialize
+  if (pDstInfo) {
+    pDstInfo->uiOutYuvTimeStamp = 0;
+    m_pDecContext->uiTimeStamp = pDstInfo->uiInBsTimeStamp;
+  } else {
+    m_pDecContext->uiTimeStamp = 0;
+  }
+  WelsDecodeBs (m_pDecContext, kpSrc, kiSrcLen, ppDst,
+                pDstInfo, NULL); //iErrorCode has been modified in this function
+  m_pDecContext->bInstantDecFlag = false; //reset no-delay flag
+  if (m_pDecContext->iErrorCode) {
+    EWelsNalUnitType eNalType =
+      NAL_UNIT_UNSPEC_0; //for NBR, IDR frames are expected to decode as followed if error decoding an IDR currently
+
+    eNalType = m_pDecContext->sCurNalHead.eNalUnitType;
+
+    if (m_pDecContext->iErrorCode & dsOutOfMemory) {
+      if (ResetDecoder())
+        return dsOutOfMemory;
+
+      return dsErrorFree;
+    }
+    //for AVC bitstream (excluding AVC with temporal scalability, including TP), as long as error occur, SHOULD notify upper layer key frame loss.
+    if ((IS_PARAM_SETS_NALS (eNalType) || NAL_UNIT_CODED_SLICE_IDR == eNalType) ||
+        (VIDEO_BITSTREAM_AVC == m_pDecContext->eVideoType)) {
+      if (m_pDecContext->pParam->eEcActiveIdc == ERROR_CON_DISABLE) {
+#ifdef LONG_TERM_REF
+        m_pDecContext->bParamSetsLostFlag = true;
+#else
+        m_pDecContext->bReferenceLostAtT0Flag = true;
+#endif
+      }
+    }
+
+    if (m_pDecContext->bPrintFrameErrorTraceFlag) {
+      WelsLog (&m_pWelsTrace->m_sLogCtx, WELS_LOG_INFO, "decode failed, failure type:%d \n",
+               m_pDecContext->iErrorCode);
+      m_pDecContext->bPrintFrameErrorTraceFlag = false;
+    } else {
+      m_pDecContext->iIgnoredErrorInfoPacketCount++;
+      if (m_pDecContext->iIgnoredErrorInfoPacketCount == INT_MAX) {
+        WelsLog (&m_pWelsTrace->m_sLogCtx, WELS_LOG_WARNING, "continuous error reached INT_MAX! Restart as 0.");
+        m_pDecContext->iIgnoredErrorInfoPacketCount = 0;
+      }
+    }
+    if ((m_pDecContext->pParam->eEcActiveIdc != ERROR_CON_DISABLE) && (pDstInfo->iBufferStatus == 1)) {
+      //TODO after dec status updated
+      m_pDecContext->iErrorCode |= dsDataErrorConcealed;
+
+      m_pDecContext->sDecoderStatistics.uiDecodedFrameCount++;
+      if (m_pDecContext->sDecoderStatistics.uiDecodedFrameCount == 0) { //exceed max value of uint32_t
+        ResetDecStatNums (&m_pDecContext->sDecoderStatistics);
+        m_pDecContext->sDecoderStatistics.uiDecodedFrameCount++;
+      }
+      int32_t iMbConcealedNum = m_pDecContext->iMbEcedNum + m_pDecContext->iMbEcedPropNum;
+      m_pDecContext->sDecoderStatistics.uiAvgEcRatio = m_pDecContext->iMbNum == 0 ?
+          (m_pDecContext->sDecoderStatistics.uiAvgEcRatio * m_pDecContext->sDecoderStatistics.uiEcFrameNum) : ((
+                m_pDecContext->sDecoderStatistics.uiAvgEcRatio * m_pDecContext->sDecoderStatistics.uiEcFrameNum) + ((
+                      iMbConcealedNum * 100) / m_pDecContext->iMbNum));
+      m_pDecContext->sDecoderStatistics.uiAvgEcPropRatio = m_pDecContext->iMbNum == 0 ?
+          (m_pDecContext->sDecoderStatistics.uiAvgEcPropRatio * m_pDecContext->sDecoderStatistics.uiEcFrameNum) : ((
+                m_pDecContext->sDecoderStatistics.uiAvgEcPropRatio * m_pDecContext->sDecoderStatistics.uiEcFrameNum) + ((
+                      m_pDecContext->iMbEcedPropNum * 100) / m_pDecContext->iMbNum));
+      m_pDecContext->sDecoderStatistics.uiEcFrameNum += (iMbConcealedNum == 0 ? 0 : 1);
+      m_pDecContext->sDecoderStatistics.uiAvgEcRatio = m_pDecContext->sDecoderStatistics.uiEcFrameNum == 0 ? 0 :
+          m_pDecContext->sDecoderStatistics.uiAvgEcRatio / m_pDecContext->sDecoderStatistics.uiEcFrameNum;
+      m_pDecContext->sDecoderStatistics.uiAvgEcPropRatio = m_pDecContext->sDecoderStatistics.uiEcFrameNum == 0 ? 0 :
+          m_pDecContext->sDecoderStatistics.uiAvgEcPropRatio / m_pDecContext->sDecoderStatistics.uiEcFrameNum;
+    }
+    iEnd = WelsTime();
+    m_pDecContext->dDecTime += (iEnd - iStart) / 1e3;
+
+    OutputStatisticsLog (m_pDecContext->sDecoderStatistics);
+
+#ifdef  _PICTURE_REORDERING_
+    ReorderPicturesInDisplay (ppDst, pDstInfo);
+#endif
+
+    return (DECODING_STATE)m_pDecContext->iErrorCode;
+  }
+  // else Error free, the current codec works well
+
+  if (pDstInfo->iBufferStatus == 1) {
+
+    m_pDecContext->sDecoderStatistics.uiDecodedFrameCount++;
+    if (m_pDecContext->sDecoderStatistics.uiDecodedFrameCount == 0) { //exceed max value of uint32_t
+      ResetDecStatNums (&m_pDecContext->sDecoderStatistics);
+      m_pDecContext->sDecoderStatistics.uiDecodedFrameCount++;
+    }
+
+    OutputStatisticsLog (m_pDecContext->sDecoderStatistics);
+  }
+  iEnd = WelsTime();
+  m_pDecContext->dDecTime += (iEnd - iStart) / 1e3;
+
+#ifdef  _PICTURE_REORDERING_
+  ReorderPicturesInDisplay (ppDst, pDstInfo);
+#endif
+  return dsErrorFree;
+}
+
+DECODING_STATE CWelsDecoder::FlushFrame (unsigned char** ppDst,
+    SBufferInfo* pDstInfo) {
+  if (m_pDecContext->bEndOfStreamFlag && m_iNumOfPicts > 0) {
+    m_iMinPOC = sIMinInt32;
+    for (int32_t i = 0; i <= m_iLargestBufferedPicIndex; ++i) {
+      if (m_iMinPOC == sIMinInt32 && m_sPictInfoList[i].iPOC > sIMinInt32) {
+        m_iMinPOC = m_sPictInfoList[i].iPOC;
+        m_iPictInfoIndex = i;
+      }
+      if (m_sPictInfoList[i].iPOC > sIMinInt32 && m_sPictInfoList[i].iPOC < m_iMinPOC) {
+        m_iMinPOC = m_sPictInfoList[i].iPOC;
+        m_iPictInfoIndex = i;
+      }
+    }
+  }
+  if (m_iMinPOC > sIMinInt32) {
+    m_LastWrittenPOC = m_iMinPOC;
+#if defined (_DEBUG)
+#ifdef _MOTION_VECTOR_DUMP_
+    fprintf (stderr, "Output POC: #%d\n", m_LastWrittenPOC);
+#endif
+#endif
+    memcpy (pDstInfo, &m_sPictInfoList[m_iPictInfoIndex].sBufferInfo, sizeof (SBufferInfo));
+    ppDst[0] = m_sPictInfoList[m_iPictInfoIndex].pData[0];
+    ppDst[1] = m_sPictInfoList[m_iPictInfoIndex].pData[1];
+    ppDst[2] = m_sPictInfoList[m_iPictInfoIndex].pData[2];
+    m_sPictInfoList[m_iPictInfoIndex].iPOC = sIMinInt32;
+    m_sPictInfoList[m_iPictInfoIndex].bLastGOP = false;
+    m_iMinPOC = sIMinInt32;
+    --m_iNumOfPicts;
+  }
+  return dsErrorFree;
+}
+
+void CWelsDecoder::OutputStatisticsLog (SDecoderStatistics& sDecoderStatistics) {
+  if ((sDecoderStatistics.uiDecodedFrameCount > 0) && (sDecoderStatistics.iStatisticsLogInterval > 0)
+      && ((sDecoderStatistics.uiDecodedFrameCount % sDecoderStatistics.iStatisticsLogInterval) == 0)) {
+    WelsLog (&m_pWelsTrace->m_sLogCtx, WELS_LOG_INFO,
+             "DecoderStatistics: uiWidth=%d, uiHeight=%d, fAverageFrameSpeedInMs=%.1f, fActualAverageFrameSpeedInMs=%.1f, \
+              uiDecodedFrameCount=%d, uiResolutionChangeTimes=%d, uiIDRCorrectNum=%d, \
+              uiAvgEcRatio=%d, uiAvgEcPropRatio=%d, uiEcIDRNum=%d, uiEcFrameNum=%d, \
+              uiIDRLostNum=%d, uiFreezingIDRNum=%d, uiFreezingNonIDRNum=%d, iAvgLumaQp=%d, \
+              iSpsReportErrorNum=%d, iSubSpsReportErrorNum=%d, iPpsReportErrorNum=%d, iSpsNoExistNalNum=%d, iSubSpsNoExistNalNum=%d, iPpsNoExistNalNum=%d, \
+              uiProfile=%d, uiLevel=%d, \
+              iCurrentActiveSpsId=%d, iCurrentActivePpsId=%d,",
+             sDecoderStatistics.uiWidth,
+             sDecoderStatistics.uiHeight,
+             sDecoderStatistics.fAverageFrameSpeedInMs,
+             sDecoderStatistics.fActualAverageFrameSpeedInMs,
+
+             sDecoderStatistics.uiDecodedFrameCount,
+             sDecoderStatistics.uiResolutionChangeTimes,
+             sDecoderStatistics.uiIDRCorrectNum,
+
+             sDecoderStatistics.uiAvgEcRatio,
+             sDecoderStatistics.uiAvgEcPropRatio,
+             sDecoderStatistics.uiEcIDRNum,
+             sDecoderStatistics.uiEcFrameNum,
+
+             sDecoderStatistics.uiIDRLostNum,
+             sDecoderStatistics.uiFreezingIDRNum,
+             sDecoderStatistics.uiFreezingNonIDRNum,
+             sDecoderStatistics.iAvgLumaQp,
+
+             sDecoderStatistics.iSpsReportErrorNum,
+             sDecoderStatistics.iSubSpsReportErrorNum,
+             sDecoderStatistics.iPpsReportErrorNum,
+             sDecoderStatistics.iSpsNoExistNalNum,
+             sDecoderStatistics.iSubSpsNoExistNalNum,
+             sDecoderStatistics.iPpsNoExistNalNum,
+
+             sDecoderStatistics.uiProfile,
+             sDecoderStatistics.uiLevel,
+
+             sDecoderStatistics.iCurrentActiveSpsId,
+             sDecoderStatistics.iCurrentActivePpsId);
+  }
+}
+
+DECODING_STATE CWelsDecoder::ReorderPicturesInDisplay (unsigned char** ppDst, SBufferInfo* pDstInfo) {
+  if (pDstInfo->iBufferStatus == 1 && m_pDecContext->pSps->uiProfileIdc != 66
+      && m_pDecContext->pPps->bEntropyCodingModeFlag) {
+    if (m_pDecContext->pSliceHeader->iPicOrderCntLsb == 0) {
+      if (m_iNumOfPicts > 0) {
+        m_iLastGOPRemainPicts = m_iNumOfPicts;
+        for (int32_t i = 0; i <= m_iLargestBufferedPicIndex; ++i) {
+          if (m_sPictInfoList[i].iPOC > sIMinInt32) {
+            m_sPictInfoList[i].bLastGOP = true;
+          }
+        }
+      }
+    }
+    for (int32_t i = 0; i < 16; ++i) {
+      if (m_sPictInfoList[i].iPOC == sIMinInt32) {
+        memcpy (&m_sPictInfoList[i].sBufferInfo, pDstInfo, sizeof (SBufferInfo));
+        m_sPictInfoList[i].pData[0] = ppDst[0];
+        m_sPictInfoList[i].pData[1] = ppDst[1];
+        m_sPictInfoList[i].pData[2] = ppDst[2];
+        m_sPictInfoList[i].iPOC = m_pDecContext->pSliceHeader->iPicOrderCntLsb;
+        m_sPictInfoList[i].iFrameNum = m_pDecContext->pSliceHeader->iFrameNum;
+        m_sPictInfoList[i].bLastGOP = false;
+        pDstInfo->iBufferStatus = 0;
+        ++m_iNumOfPicts;
+        if (i > m_iLargestBufferedPicIndex) {
+          m_iLargestBufferedPicIndex = i;
+        }
+        break;
+      }
+    }
+    if (m_iLastGOPRemainPicts > 0) {
+      m_iMinPOC = sIMinInt32;
+      for (int32_t i = 0; i <= m_iLargestBufferedPicIndex; ++i) {
+        if (m_iMinPOC == sIMinInt32 && m_sPictInfoList[i].iPOC > sIMinInt32 && m_sPictInfoList[i].bLastGOP) {
+          m_iMinPOC = m_sPictInfoList[i].iPOC;
+          m_iPictInfoIndex = i;
+        }
+        if (m_sPictInfoList[i].iPOC > sIMinInt32 && m_sPictInfoList[i].iPOC < m_iMinPOC && m_sPictInfoList[i].bLastGOP) {
+          m_iMinPOC = m_sPictInfoList[i].iPOC;
+          m_iPictInfoIndex = i;
+        }
+      }
+      m_LastWrittenPOC = m_iMinPOC;
+#if defined (_DEBUG)
+#ifdef _MOTION_VECTOR_DUMP_
+      fprintf (stderr, "Output POC: #%d\n", m_LastWrittenPOC);
+#endif
+#endif
+      memcpy (pDstInfo, &m_sPictInfoList[m_iPictInfoIndex].sBufferInfo, sizeof (SBufferInfo));
+      ppDst[0] = m_sPictInfoList[m_iPictInfoIndex].pData[0];
+      ppDst[1] = m_sPictInfoList[m_iPictInfoIndex].pData[1];
+      ppDst[2] = m_sPictInfoList[m_iPictInfoIndex].pData[2];
+      m_sPictInfoList[m_iPictInfoIndex].iPOC = sIMinInt32;
+      m_sPictInfoList[m_iPictInfoIndex].bLastGOP = false;
+      m_iMinPOC = sIMinInt32;
+      --m_iNumOfPicts;
+      --m_iLastGOPRemainPicts;
+      if (m_iLastGOPRemainPicts == 0) {
+        m_LastWrittenPOC = sIMinInt32;
+      }
+      return dsErrorFree;
+    }
+    if (m_iNumOfPicts > 0) {
+      m_iMinPOC = sIMinInt32;
+      for (int32_t i = 0; i <= m_iLargestBufferedPicIndex; ++i) {
+        if (m_iMinPOC == sIMinInt32 && m_sPictInfoList[i].iPOC > sIMinInt32) {
+          m_iMinPOC = m_sPictInfoList[i].iPOC;
+          m_iPictInfoIndex = i;
+        }
+        if (m_sPictInfoList[i].iPOC > sIMinInt32 && m_sPictInfoList[i].iPOC < m_iMinPOC) {
+          m_iMinPOC = m_sPictInfoList[i].iPOC;
+          m_iPictInfoIndex = i;
+        }
+      }
+    }
+    if (m_iMinPOC > sIMinInt32) {
+      if ((m_LastWrittenPOC > sIMinInt32 && m_iMinPOC - m_LastWrittenPOC <= 1)
+          || m_iMinPOC < m_pDecContext->pSliceHeader->iPicOrderCntLsb) {
+        m_LastWrittenPOC = m_iMinPOC;
+#if defined (_DEBUG)
+#ifdef _MOTION_VECTOR_DUMP_
+        fprintf (stderr, "Output POC: #%d\n", m_LastWrittenPOC);
+#endif
+#endif
+        memcpy (pDstInfo, &m_sPictInfoList[m_iPictInfoIndex].sBufferInfo, sizeof (SBufferInfo));
+        ppDst[0] = m_sPictInfoList[m_iPictInfoIndex].pData[0];
+        ppDst[1] = m_sPictInfoList[m_iPictInfoIndex].pData[1];
+        ppDst[2] = m_sPictInfoList[m_iPictInfoIndex].pData[2];
+        m_sPictInfoList[m_iPictInfoIndex].iPOC = sIMinInt32;
+        m_sPictInfoList[m_iPictInfoIndex].bLastGOP = false;
+        m_iMinPOC = sIMinInt32;
+        --m_iNumOfPicts;
+        return dsErrorFree;
+      }
+    }
+  }
+
+  return dsErrorFree;
+}
+
+DECODING_STATE CWelsDecoder::DecodeParser (const unsigned char* kpSrc,
+    const int kiSrcLen,
+    SParserBsInfo* pDstInfo) {
+  if (m_pDecContext == NULL || m_pDecContext->pParam == NULL) {
+    if (m_pWelsTrace != NULL) {
+      WelsLog (&m_pWelsTrace->m_sLogCtx, WELS_LOG_ERROR, "Call DecodeParser without Initialize.\n");
+    }
+    return dsInitialOptExpected;
+  }
+
+  if (!m_pDecContext->pParam->bParseOnly) {
+    WelsLog (&m_pWelsTrace->m_sLogCtx, WELS_LOG_ERROR, "bParseOnly should be true for this API calling! \n");
+    m_pDecContext->iErrorCode |= dsInvalidArgument;
+    return dsInvalidArgument;
+  }
+  int64_t iEnd, iStart = WelsTime();
+  if (CheckBsBuffer (m_pDecContext, kiSrcLen)) {
+    if (ResetDecoder())
+      return dsOutOfMemory;
+
+    return dsErrorFree;
+  }
+  if (kiSrcLen > 0 && kpSrc != NULL) {
+#ifdef OUTPUT_BITSTREAM
+    if (m_pFBS) {
+      WelsFwrite (kpSrc, sizeof (unsigned char), kiSrcLen, m_pFBS);
+      WelsFflush (m_pFBS);
+    }
+#endif//OUTPUT_BIT_STREAM
+    m_pDecContext->bEndOfStreamFlag = false;
+  } else {
+    //For application MODE, the error detection should be added for safe.
+    //But for CONSOLE MODE, when decoding LAST AU, kiSrcLen==0 && kpSrc==NULL.
+    m_pDecContext->bEndOfStreamFlag = true;
+    m_pDecContext->bInstantDecFlag = true;
+  }
+
+  m_pDecContext->iErrorCode = dsErrorFree; //initialize at the starting of AU decoding.
+  m_pDecContext->pParam->eEcActiveIdc = ERROR_CON_DISABLE; //add protection to disable EC here.
+  m_pDecContext->iFeedbackNalRefIdc = -1; //initialize
+  if (!m_pDecContext->bFramePending) { //frame complete
+    m_pDecContext->pParserBsInfo->iNalNum = 0;
+    memset (m_pDecContext->pParserBsInfo->pNalLenInByte, 0, MAX_NAL_UNITS_IN_LAYER);
+  }
+  pDstInfo->iNalNum = 0;
+  pDstInfo->iSpsWidthInPixel = pDstInfo->iSpsHeightInPixel = 0;
+  if (pDstInfo) {
+    m_pDecContext->uiTimeStamp = pDstInfo->uiInBsTimeStamp;
+    pDstInfo->uiOutBsTimeStamp = 0;
+  } else {
+    m_pDecContext->uiTimeStamp = 0;
+  }
+  WelsDecodeBs (m_pDecContext, kpSrc, kiSrcLen, NULL, NULL, pDstInfo);
+  if (m_pDecContext->iErrorCode & dsOutOfMemory) {
+    if (ResetDecoder())
+      return dsOutOfMemory;
+    return dsErrorFree;
+  }
+
+  if (!m_pDecContext->bFramePending && m_pDecContext->pParserBsInfo->iNalNum) {
+    memcpy (pDstInfo, m_pDecContext->pParserBsInfo, sizeof (SParserBsInfo));
+
+    if (m_pDecContext->iErrorCode == ERR_NONE) { //update statistics: decoding frame count
+      m_pDecContext->sDecoderStatistics.uiDecodedFrameCount++;
+      if (m_pDecContext->sDecoderStatistics.uiDecodedFrameCount == 0) { //exceed max value of uint32_t
+        ResetDecStatNums (&m_pDecContext->sDecoderStatistics);
+        m_pDecContext->sDecoderStatistics.uiDecodedFrameCount++;
+      }
+    }
+  }
+
+  m_pDecContext->bInstantDecFlag = false; //reset no-delay flag
+
+  if (m_pDecContext->iErrorCode && m_pDecContext->bPrintFrameErrorTraceFlag) {
+    WelsLog (&m_pWelsTrace->m_sLogCtx, WELS_LOG_INFO, "decode failed, failure type:%d \n", m_pDecContext->iErrorCode);
+    m_pDecContext->bPrintFrameErrorTraceFlag = false;
+  }
+  iEnd = WelsTime();
+  m_pDecContext->dDecTime += (iEnd - iStart) / 1e3;
+
+  return (DECODING_STATE) m_pDecContext->iErrorCode;
+}
+
+DECODING_STATE CWelsDecoder::DecodeFrame (const unsigned char* kpSrc,
+    const int kiSrcLen,
+    unsigned char** ppDst,
+    int* pStride,
+    int& iWidth,
+    int& iHeight) {
+  DECODING_STATE eDecState = dsErrorFree;
+  SBufferInfo    DstInfo;
+
+  memset (&DstInfo, 0, sizeof (SBufferInfo));
+  DstInfo.UsrData.sSystemBuffer.iStride[0] = pStride[0];
+  DstInfo.UsrData.sSystemBuffer.iStride[1] = pStride[1];
+  DstInfo.UsrData.sSystemBuffer.iWidth = iWidth;
+  DstInfo.UsrData.sSystemBuffer.iHeight = iHeight;
+
+  eDecState = DecodeFrame2 (kpSrc, kiSrcLen, ppDst, &DstInfo);
+  if (eDecState == dsErrorFree) {
+    pStride[0] = DstInfo.UsrData.sSystemBuffer.iStride[0];
+    pStride[1] = DstInfo.UsrData.sSystemBuffer.iStride[1];
+    iWidth     = DstInfo.UsrData.sSystemBuffer.iWidth;
+    iHeight    = DstInfo.UsrData.sSystemBuffer.iHeight;
+  }
+
+  return eDecState;
+}
+
+DECODING_STATE CWelsDecoder::DecodeFrameEx (const unsigned char* kpSrc,
+    const int kiSrcLen,
+    unsigned char* pDst,
+    int iDstStride,
+    int& iDstLen,
+    int& iWidth,
+    int& iHeight,
+    int& iColorFormat) {
+  DECODING_STATE state = dsErrorFree;
+
+  return state;
+}
+
+
+} // namespace WelsDec
+
+
+using namespace WelsDec;
+/*
+*       WelsGetDecoderCapability
+*       @return: DecCapability information
+*/
+int WelsGetDecoderCapability (SDecoderCapability* pDecCapability) {
+  memset (pDecCapability, 0, sizeof (SDecoderCapability));
+  pDecCapability->iProfileIdc = 66; //Baseline
+  pDecCapability->iProfileIop = 0xE0; //11100000b
+  pDecCapability->iLevelIdc = 32; //level_idc = 3.2
+  pDecCapability->iMaxMbps = 216000; //from level_idc = 3.2
+  pDecCapability->iMaxFs = 5120; //from level_idc = 3.2
+  pDecCapability->iMaxCpb = 20000; //from level_idc = 3.2
+  pDecCapability->iMaxDpb = 20480; //from level_idc = 3.2
+  pDecCapability->iMaxBr = 20000; //from level_idc = 3.2
+  pDecCapability->bRedPicCap = 0; //not support redundant pic
+
+  return ERR_NONE;
+}
+/* WINAPI is indeed in prefix due to sync to application layer callings!! */
+
+/*
+*   WelsCreateDecoder
+*   @return:    success in return 0, otherwise failed.
+*/
+long WelsCreateDecoder (ISVCDecoder** ppDecoder) {
+
+  if (NULL == ppDecoder) {
+    return ERR_INVALID_PARAMETERS;
+  }
+
+  *ppDecoder = new CWelsDecoder();
+
+  if (NULL == *ppDecoder) {
+    return ERR_MALLOC_FAILED;
+  }
+
+  return ERR_NONE;
+}
+
+/*
+*   WelsDestroyDecoder
+*/
+void WelsDestroyDecoder (ISVCDecoder* pDecoder) {
+  if (NULL != pDecoder) {
+    delete (CWelsDecoder*)pDecoder;
+  }
+}
--- /dev/null
+++ b/codec/vsproject/windecoder.sln
@@ -1,0 +1,28 @@
+
+Microsoft Visual Studio Solution File, Format Version 12.00
+# Visual Studio 14
+VisualStudioVersion = 14.0.25420.1
+MinimumVisualStudioVersion = 10.0.40219.1
+Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "windecoder", "windecoder.vcxproj", "{155C0CE7-12D7-412D-8B26-1C52C7346B3A}"
+EndProject
+Global
+	GlobalSection(SolutionConfigurationPlatforms) = preSolution
+		Debug|x64 = Debug|x64
+		Debug|x86 = Debug|x86
+		Release|x64 = Release|x64
+		Release|x86 = Release|x86
+	EndGlobalSection
+	GlobalSection(ProjectConfigurationPlatforms) = postSolution
+		{155C0CE7-12D7-412D-8B26-1C52C7346B3A}.Debug|x64.ActiveCfg = Debug|x64
+		{155C0CE7-12D7-412D-8B26-1C52C7346B3A}.Debug|x64.Build.0 = Debug|x64
+		{155C0CE7-12D7-412D-8B26-1C52C7346B3A}.Debug|x86.ActiveCfg = Debug|Win32
+		{155C0CE7-12D7-412D-8B26-1C52C7346B3A}.Debug|x86.Build.0 = Debug|Win32
+		{155C0CE7-12D7-412D-8B26-1C52C7346B3A}.Release|x64.ActiveCfg = Release|x64
+		{155C0CE7-12D7-412D-8B26-1C52C7346B3A}.Release|x64.Build.0 = Release|x64
+		{155C0CE7-12D7-412D-8B26-1C52C7346B3A}.Release|x86.ActiveCfg = Release|Win32
+		{155C0CE7-12D7-412D-8B26-1C52C7346B3A}.Release|x86.Build.0 = Release|Win32
+	EndGlobalSection
+	GlobalSection(SolutionProperties) = preSolution
+		HideSolutionNode = FALSE
+	EndGlobalSection
+EndGlobal
--- /dev/null
+++ b/codec/vsproject/windecoder.vcxproj
@@ -1,0 +1,378 @@
+<?xml version="1.0" encoding="utf-8"?>
+<Project DefaultTargets="Build" ToolsVersion="14.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+  <ItemGroup Label="ProjectConfigurations">
+    <ProjectConfiguration Include="Debug|Win32">
+      <Configuration>Debug</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Release|Win32">
+      <Configuration>Release</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Debug|x64">
+      <Configuration>Debug</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Release|x64">
+      <Configuration>Release</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+  </ItemGroup>
+  <PropertyGroup Label="Globals">
+    <ProjectGuid>{155C0CE7-12D7-412D-8B26-1C52C7346B3A}</ProjectGuid>
+    <Keyword>Win32Proj</Keyword>
+    <RootNamespace>windecoder</RootNamespace>
+    <WindowsTargetPlatformVersion>8.1</WindowsTargetPlatformVersion>
+  </PropertyGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <UseDebugLibraries>true</UseDebugLibraries>
+    <PlatformToolset>v140</PlatformToolset>
+    <CharacterSet>Unicode</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <UseDebugLibraries>false</UseDebugLibraries>
+    <PlatformToolset>v140</PlatformToolset>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <CharacterSet>Unicode</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <UseDebugLibraries>true</UseDebugLibraries>
+    <PlatformToolset>v140</PlatformToolset>
+    <CharacterSet>MultiByte</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <UseDebugLibraries>false</UseDebugLibraries>
+    <PlatformToolset>v140</PlatformToolset>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <CharacterSet>MultiByte</CharacterSet>
+  </PropertyGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
+  <ImportGroup Label="ExtensionSettings">
+  </ImportGroup>
+  <ImportGroup Label="Shared">
+  </ImportGroup>
+  <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <PropertyGroup Label="UserMacros" />
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+    <LinkIncremental>true</LinkIncremental>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
+    <LinkIncremental>true</LinkIncremental>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+    <LinkIncremental>false</LinkIncremental>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
+    <LinkIncremental>false</LinkIncremental>
+  </PropertyGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+    <ClCompile>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <WarningLevel>Level3</WarningLevel>
+      <Optimization>Disabled</Optimization>
+      <PreprocessorDefinitions>WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <SDLCheck>true</SDLCheck>
+    </ClCompile>
+    <Link>
+      <SubSystem>Console</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
+    <ClCompile>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <WarningLevel>Level3</WarningLevel>
+      <Optimization>Disabled</Optimization>
+      <PreprocessorDefinitions>_DEBUG;_CONSOLE;X64;HAVE_AVX2;_CRT_SECURE_NO_WARNINGS;_MOTION_VECTOR_DUMP_;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <SDLCheck>true</SDLCheck>
+      <AdditionalIncludeDirectories>..\decoder\core\inc;..\common\inc;..\console\common\inc;..\api\svc;..\console\dec\inc;..\decoder\plus\inc;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+    </ClCompile>
+    <Link>
+      <SubSystem>Console</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+    <ClCompile>
+      <WarningLevel>Level3</WarningLevel>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <Optimization>MaxSpeed</Optimization>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <PreprocessorDefinitions>WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <SDLCheck>true</SDLCheck>
+    </ClCompile>
+    <Link>
+      <SubSystem>Console</SubSystem>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <OptimizeReferences>true</OptimizeReferences>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
+    <ClCompile>
+      <WarningLevel>Level3</WarningLevel>
+      <PrecompiledHeader>
+      </PrecompiledHeader>
+      <Optimization>MaxSpeed</Optimization>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <PreprocessorDefinitions>NDEBUG;_CONSOLE;X64;HAVE_AVX2;X86_ASM;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <SDLCheck>true</SDLCheck>
+      <AdditionalIncludeDirectories>..\decoder\core\inc;..\common\inc;..\console\common\inc;..\api\svc;..\console\dec\inc;..\decoder\plus\inc;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+    </ClCompile>
+    <Link>
+      <SubSystem>Console</SubSystem>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <OptimizeReferences>true</OptimizeReferences>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemGroup>
+    <ClInclude Include="..\common\inc\copy_mb.h" />
+    <ClInclude Include="..\common\inc\cpu.h" />
+    <ClInclude Include="..\common\inc\cpu_core.h" />
+    <ClInclude Include="..\common\inc\crt_util_safe_x.h" />
+    <ClInclude Include="..\common\inc\deblocking_common.h" />
+    <ClInclude Include="..\common\inc\expand_pic.h" />
+    <ClInclude Include="..\common\inc\golomb_common.h" />
+    <ClInclude Include="..\common\inc\intra_pred_common.h" />
+    <ClInclude Include="..\common\inc\ls_defines.h" />
+    <ClInclude Include="..\common\inc\macros.h" />
+    <ClInclude Include="..\common\inc\mc.h" />
+    <ClInclude Include="..\common\inc\measure_time.h" />
+    <ClInclude Include="..\common\inc\memory_align.h" />
+    <ClInclude Include="..\common\inc\sad_common.h" />
+    <ClInclude Include="..\common\inc\typedefs.h" />
+    <ClInclude Include="..\common\inc\utils.h" />
+    <ClInclude Include="..\common\inc\version.h" />
+    <ClInclude Include="..\common\inc\welsCodecTrace.h" />
+    <ClInclude Include="..\common\inc\WelsList.h" />
+    <ClInclude Include="..\common\inc\WelsLock.h" />
+    <ClInclude Include="..\common\inc\WelsTask.h" />
+    <ClInclude Include="..\common\inc\WelsTaskThread.h" />
+    <ClInclude Include="..\common\inc\WelsThread.h" />
+    <ClInclude Include="..\common\inc\WelsThreadLib.h" />
+    <ClInclude Include="..\common\inc\WelsThreadPool.h" />
+    <ClInclude Include="..\common\inc\wels_common_defs.h" />
+    <ClInclude Include="..\common\inc\wels_const_common.h" />
+    <ClInclude Include="..\console\common\inc\read_config.h" />
+    <ClInclude Include="..\console\dec\inc\d3d9_utils.h" />
+    <ClInclude Include="..\decoder\core\inc\au_parser.h" />
+    <ClInclude Include="..\decoder\core\inc\bit_stream.h" />
+    <ClInclude Include="..\decoder\core\inc\cabac_decoder.h" />
+    <ClInclude Include="..\decoder\core\inc\deblocking.h" />
+    <ClInclude Include="..\decoder\core\inc\decoder.h" />
+    <ClInclude Include="..\decoder\core\inc\decoder_context.h" />
+    <ClInclude Include="..\decoder\core\inc\decoder_core.h" />
+    <ClInclude Include="..\decoder\core\inc\decode_mb_aux.h" />
+    <ClInclude Include="..\decoder\core\inc\decode_slice.h" />
+    <ClInclude Include="..\decoder\core\inc\dec_frame.h" />
+    <ClInclude Include="..\decoder\core\inc\dec_golomb.h" />
+    <ClInclude Include="..\decoder\core\inc\error_code.h" />
+    <ClInclude Include="..\decoder\core\inc\error_concealment.h" />
+    <ClInclude Include="..\decoder\core\inc\fmo.h" />
+    <ClInclude Include="..\decoder\core\inc\get_intra_predictor.h" />
+    <ClInclude Include="..\decoder\core\inc\manage_dec_ref.h" />
+    <ClInclude Include="..\decoder\core\inc\mb_cache.h" />
+    <ClInclude Include="..\decoder\core\inc\memmgr_nal_unit.h" />
+    <ClInclude Include="..\decoder\core\inc\mv_pred.h" />
+    <ClInclude Include="..\decoder\core\inc\nalu.h" />
+    <ClInclude Include="..\decoder\core\inc\nal_prefix.h" />
+    <ClInclude Include="..\decoder\core\inc\parameter_sets.h" />
+    <ClInclude Include="..\decoder\core\inc\parse_mb_syn_cabac.h" />
+    <ClInclude Include="..\decoder\core\inc\parse_mb_syn_cavlc.h" />
+    <ClInclude Include="..\decoder\core\inc\picture.h" />
+    <ClInclude Include="..\decoder\core\inc\pic_queue.h" />
+    <ClInclude Include="..\decoder\core\inc\rec_mb.h" />
+    <ClInclude Include="..\decoder\core\inc\slice.h" />
+    <ClInclude Include="..\decoder\core\inc\vlc_decoder.h" />
+    <ClInclude Include="..\decoder\core\inc\wels_common_basis.h" />
+    <ClInclude Include="..\decoder\core\inc\wels_const.h" />
+    <ClInclude Include="..\decoder\plus\inc\welsDecoderExt.h" />
+  </ItemGroup>
+  <ItemGroup>
+    <ClCompile Include="..\common\src\common_tables.cpp" />
+    <ClCompile Include="..\common\src\copy_mb.cpp" />
+    <ClCompile Include="..\common\src\cpu.cpp" />
+    <ClCompile Include="..\common\src\crt_util_safe_x.cpp" />
+    <ClCompile Include="..\common\src\deblocking_common.cpp" />
+    <ClCompile Include="..\common\src\expand_pic.cpp" />
+    <ClCompile Include="..\common\src\intra_pred_common.cpp" />
+    <ClCompile Include="..\common\src\mc.cpp" />
+    <ClCompile Include="..\common\src\memory_align.cpp" />
+    <ClCompile Include="..\common\src\sad_common.cpp" />
+    <ClCompile Include="..\common\src\utils.cpp" />
+    <ClCompile Include="..\common\src\welsCodecTrace.cpp" />
+    <ClCompile Include="..\common\src\WelsTaskThread.cpp" />
+    <ClCompile Include="..\common\src\WelsThread.cpp" />
+    <ClCompile Include="..\common\src\WelsThreadLib.cpp" />
+    <ClCompile Include="..\common\src\WelsThreadPool.cpp" />
+    <ClCompile Include="..\console\common\src\read_config.cpp" />
+    <ClCompile Include="..\console\dec\src\d3d9_utils.cpp" />
+    <ClCompile Include="..\console\dec\src\h264dec.cpp" />
+    <ClCompile Include="..\decoder\core\src\au_parser.cpp" />
+    <ClCompile Include="..\decoder\core\src\bit_stream.cpp" />
+    <ClCompile Include="..\decoder\core\src\cabac_decoder.cpp" />
+    <ClCompile Include="..\decoder\core\src\deblocking.cpp" />
+    <ClCompile Include="..\decoder\core\src\decoder.cpp" />
+    <ClCompile Include="..\decoder\core\src\decoder_core.cpp" />
+    <ClCompile Include="..\decoder\core\src\decoder_data_tables.cpp" />
+    <ClCompile Include="..\decoder\core\src\decode_mb_aux.cpp" />
+    <ClCompile Include="..\decoder\core\src\decode_slice.cpp" />
+    <ClCompile Include="..\decoder\core\src\error_concealment.cpp" />
+    <ClCompile Include="..\decoder\core\src\fmo.cpp" />
+    <ClCompile Include="..\decoder\core\src\get_intra_predictor.cpp" />
+    <ClCompile Include="..\decoder\core\src\manage_dec_ref.cpp" />
+    <ClCompile Include="..\decoder\core\src\memmgr_nal_unit.cpp" />
+    <ClCompile Include="..\decoder\core\src\mv_pred.cpp" />
+    <ClCompile Include="..\decoder\core\src\parse_mb_syn_cabac.cpp" />
+    <ClCompile Include="..\decoder\core\src\parse_mb_syn_cavlc.cpp" />
+    <ClCompile Include="..\decoder\core\src\pic_queue.cpp" />
+    <ClCompile Include="..\decoder\core\src\rec_mb.cpp" />
+    <ClCompile Include="..\decoder\plus\src\welsDecoderExt.cpp" />
+  </ItemGroup>
+  <ItemGroup>
+    <CustomBuild Include="..\common\x86\asm_inc.asm">
+      <FileType>Document</FileType>
+      <Command Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">nasm -f win64 -DWIN64  -DHAVE_AVX2 -I../common/x86/ -o "$(OutDir)%(Filename).obj" "%(FullPath)"</Command>
+      <Message Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">Assembling by NASM - %(Filename)%(Extension)</Message>
+      <Outputs Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">$(OutDir)%(Filename).obj;%(Outputs)</Outputs>
+      <Command Condition="'$(Configuration)|$(Platform)'=='Release|x64'">nasm -f win64 -DWIN64  -DHAVE_AVX2 -I../common/x86/ -o "$(OutDir)%(Filename).obj" "%(FullPath)"</Command>
+      <Message Condition="'$(Configuration)|$(Platform)'=='Release|x64'">Assembling by NASM - %(Filename)%(Extension)</Message>
+      <Outputs Condition="'$(Configuration)|$(Platform)'=='Release|x64'">$(OutDir)%(Filename).obj;%(Outputs)</Outputs>
+    </CustomBuild>
+    <CustomBuild Include="..\common\x86\cpuid.asm">
+      <FileType>Document</FileType>
+      <Command Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">nasm -f win64 -DWIN64  -DHAVE_AVX2 -I../common/x86/ -o "$(OutDir)%(Filename).obj" "%(FullPath)"</Command>
+      <Outputs Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">$(OutDir)%(Filename).obj;%(Outputs)</Outputs>
+      <Message Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">Assembling by NASM - %(Filename)%(Extension)</Message>
+      <Command Condition="'$(Configuration)|$(Platform)'=='Release|x64'">nasm -f win64 -DWIN64  -DHAVE_AVX2 -I../common/x86/ -o "$(OutDir)%(Filename).obj" "%(FullPath)"</Command>
+      <Message Condition="'$(Configuration)|$(Platform)'=='Release|x64'">Assembling by NASM - %(Filename)%(Extension)</Message>
+      <Outputs Condition="'$(Configuration)|$(Platform)'=='Release|x64'">$(OutDir)%(Filename).obj;%(Outputs)</Outputs>
+    </CustomBuild>
+    <CustomBuild Include="..\common\x86\dct.asm">
+      <FileType>Document</FileType>
+      <Command Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">nasm -f win64 -DWIN64  -DHAVE_AVX2 -I../common/x86/ -o "$(OutDir)%(Filename).obj" "%(FullPath)"</Command>
+      <Message Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">Assembling by NASM - %(Filename)%(Extension)</Message>
+      <Outputs Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">$(OutDir)%(Filename).obj;%(Outputs)</Outputs>
+      <Command Condition="'$(Configuration)|$(Platform)'=='Release|x64'">nasm -f win64 -DWIN64  -DHAVE_AVX2 -I../common/x86/ -o "$(OutDir)%(Filename).obj" "%(FullPath)"</Command>
+      <Message Condition="'$(Configuration)|$(Platform)'=='Release|x64'">Assembling by NASM - %(Filename)%(Extension)</Message>
+      <Outputs Condition="'$(Configuration)|$(Platform)'=='Release|x64'">$(OutDir)%(Filename).obj;%(Outputs)</Outputs>
+    </CustomBuild>
+    <CustomBuild Include="..\common\x86\deblock.asm">
+      <FileType>Document</FileType>
+      <Command Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">nasm -f win64 -DWIN64  -DHAVE_AVX2 -I../common/x86/ -o "$(OutDir)%(Filename).obj" "%(FullPath)"</Command>
+      <Outputs Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">$(OutDir)%(Filename).obj;%(Outputs)</Outputs>
+      <Message Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">Assembling by NASM - %(Filename)%(Extension)</Message>
+      <Command Condition="'$(Configuration)|$(Platform)'=='Release|x64'">nasm -f win64 -DWIN64  -DHAVE_AVX2 -I../common/x86/ -o "$(OutDir)%(Filename).obj" "%(FullPath)"</Command>
+      <Message Condition="'$(Configuration)|$(Platform)'=='Release|x64'">Assembling by NASM - %(Filename)%(Extension)</Message>
+      <Outputs Condition="'$(Configuration)|$(Platform)'=='Release|x64'">$(OutDir)%(Filename).obj;%(Outputs)</Outputs>
+    </CustomBuild>
+    <CustomBuild Include="..\common\x86\expand_picture.asm">
+      <FileType>Document</FileType>
+      <Command Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">nasm -f win64 -DWIN64  -DHAVE_AVX2 -I../common/x86/ -o "$(OutDir)%(Filename).obj" "%(FullPath)"</Command>
+      <Outputs Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">$(OutDir)%(Filename).obj;%(Outputs)</Outputs>
+      <Message Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">Assembling by NASM - %(Filename)%(Extension)</Message>
+      <Command Condition="'$(Configuration)|$(Platform)'=='Release|x64'">nasm -f win64 -DWIN64  -DHAVE_AVX2 -I../common/x86/ -o "$(OutDir)%(Filename).obj" "%(FullPath)"</Command>
+      <Message Condition="'$(Configuration)|$(Platform)'=='Release|x64'">Assembling by NASM - %(Filename)%(Extension)</Message>
+      <Outputs Condition="'$(Configuration)|$(Platform)'=='Release|x64'">$(OutDir)%(Filename).obj;%(Outputs)</Outputs>
+    </CustomBuild>
+    <CustomBuild Include="..\common\x86\intra_pred_com.asm">
+      <FileType>Document</FileType>
+      <Command Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">nasm -f win64 -DWIN64  -DHAVE_AVX2 -I../common/x86/ -o "$(OutDir)%(Filename).obj" "%(FullPath)"</Command>
+      <Outputs Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">$(OutDir)%(Filename).obj;%(Outputs)</Outputs>
+      <Message Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">Assembling by NASM - %(Filename)%(Extension)</Message>
+      <Command Condition="'$(Configuration)|$(Platform)'=='Release|x64'">nasm -f win64 -DWIN64  -DHAVE_AVX2 -I../common/x86/ -o "$(OutDir)%(Filename).obj" "%(FullPath)"</Command>
+      <Message Condition="'$(Configuration)|$(Platform)'=='Release|x64'">Assembling by NASM - %(Filename)%(Extension)</Message>
+      <Outputs Condition="'$(Configuration)|$(Platform)'=='Release|x64'">$(OutDir)%(Filename).obj;%(Outputs)</Outputs>
+    </CustomBuild>
+    <CustomBuild Include="..\common\x86\mb_copy.asm">
+      <FileType>Document</FileType>
+      <Command Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">nasm -f win64 -DWIN64  -DHAVE_AVX2 -I../common/x86/ -o "$(OutDir)%(Filename).obj" "%(FullPath)"</Command>
+      <Outputs Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">$(OutDir)%(Filename).obj;%(Outputs)</Outputs>
+      <Message Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">Assembling by NASM - %(Filename)%(Extension)</Message>
+      <Command Condition="'$(Configuration)|$(Platform)'=='Release|x64'">nasm -f win64 -DWIN64  -DHAVE_AVX2 -I../common/x86/ -o "$(OutDir)%(Filename).obj" "%(FullPath)"</Command>
+      <Message Condition="'$(Configuration)|$(Platform)'=='Release|x64'">Assembling by NASM - %(Filename)%(Extension)</Message>
+      <Outputs Condition="'$(Configuration)|$(Platform)'=='Release|x64'">$(OutDir)%(Filename).obj;%(Outputs)</Outputs>
+    </CustomBuild>
+    <CustomBuild Include="..\common\x86\mc_chroma.asm">
+      <FileType>Document</FileType>
+      <Command Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">nasm -f win64 -DWIN64  -DHAVE_AVX2 -I../common/x86/ -o "$(OutDir)%(Filename).obj" "%(FullPath)"</Command>
+      <Outputs Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">$(OutDir)%(Filename).obj;%(Outputs)</Outputs>
+      <Message Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">Assembling by NASM - %(Filename)%(Extension)</Message>
+      <Command Condition="'$(Configuration)|$(Platform)'=='Release|x64'">nasm -f win64 -DWIN64  -DHAVE_AVX2 -I../common/x86/ -o "$(OutDir)%(Filename).obj" "%(FullPath)"</Command>
+      <Message Condition="'$(Configuration)|$(Platform)'=='Release|x64'">Assembling by NASM - %(Filename)%(Extension)</Message>
+      <Outputs Condition="'$(Configuration)|$(Platform)'=='Release|x64'">$(OutDir)%(Filename).obj;%(Outputs)</Outputs>
+    </CustomBuild>
+    <CustomBuild Include="..\common\x86\mc_luma.asm">
+      <FileType>Document</FileType>
+      <Command Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">nasm -f win64 -DWIN64  -DHAVE_AVX2 -I../common/x86/ -o "$(OutDir)%(Filename).obj" "%(FullPath)"</Command>
+      <Outputs Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">$(OutDir)%(Filename).obj;%(Outputs)</Outputs>
+      <Message Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">Assembling by NASM - %(Filename)%(Extension)</Message>
+      <Command Condition="'$(Configuration)|$(Platform)'=='Release|x64'">nasm -f win64 -DWIN64  -DHAVE_AVX2 -I../common/x86/ -o "$(OutDir)%(Filename).obj" "%(FullPath)"</Command>
+      <Message Condition="'$(Configuration)|$(Platform)'=='Release|x64'">Assembling by NASM - %(Filename)%(Extension)</Message>
+      <Outputs Condition="'$(Configuration)|$(Platform)'=='Release|x64'">$(OutDir)%(Filename).obj;%(Outputs)</Outputs>
+    </CustomBuild>
+    <CustomBuild Include="..\common\x86\satd_sad.asm">
+      <FileType>Document</FileType>
+      <Command Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">nasm -f win64 -DWIN64  -DHAVE_AVX2 -I../common/x86/ -o "$(OutDir)%(Filename).obj" "%(FullPath)"</Command>
+      <Outputs Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">$(OutDir)%(Filename).obj;%(Outputs)</Outputs>
+      <Message Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">Assembling by NASM - %(Filename)%(Extension)</Message>
+      <Command Condition="'$(Configuration)|$(Platform)'=='Release|x64'">nasm -f win64 -DWIN64  -DHAVE_AVX2 -I../common/x86/ -o "$(OutDir)%(Filename).obj" "%(FullPath)"</Command>
+      <Message Condition="'$(Configuration)|$(Platform)'=='Release|x64'">Assembling by NASM - %(Filename)%(Extension)</Message>
+      <Outputs Condition="'$(Configuration)|$(Platform)'=='Release|x64'">$(OutDir)%(Filename).obj;%(Outputs)</Outputs>
+    </CustomBuild>
+    <CustomBuild Include="..\common\x86\vaa.asm">
+      <FileType>Document</FileType>
+      <Command Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">nasm -f win64 -DWIN64  -DHAVE_AVX2 -I../common/x86/ -o "$(OutDir)%(Filename).obj" "%(FullPath)"</Command>
+      <Outputs Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">$(OutDir)%(Filename).obj;%(Outputs)</Outputs>
+      <Message Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">Assembling by NASM - %(Filename)%(Extension)</Message>
+      <Command Condition="'$(Configuration)|$(Platform)'=='Release|x64'">nasm -f win64 -DWIN64  -DHAVE_AVX2 -I../common/x86/ -o "$(OutDir)%(Filename).obj" "%(FullPath)"</Command>
+      <Message Condition="'$(Configuration)|$(Platform)'=='Release|x64'">Assembling by NASM - %(Filename)%(Extension)</Message>
+      <Outputs Condition="'$(Configuration)|$(Platform)'=='Release|x64'">$(OutDir)%(Filename).obj;%(Outputs)</Outputs>
+    </CustomBuild>
+    <CustomBuild Include="..\decoder\core\x86\intra_pred.asm">
+      <FileType>Document</FileType>
+      <Command Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">nasm -f win64 -DWIN64  -DHAVE_AVX2 -I../common/x86/  -I../hdecoder/incl -o "$(OutDir)%(Filename).obj" "%(FullPath)"</Command>
+      <Outputs Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">$(OutDir)%(Filename).obj;%(Outputs)</Outputs>
+      <Message Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">Assembling by NASM - %(Filename)%(Extension)</Message>
+      <Command Condition="'$(Configuration)|$(Platform)'=='Release|x64'">nasm -f win64 -DWIN64  -DHAVE_AVX2 -I../common/x86/ -o "$(OutDir)%(Filename).obj" "%(FullPath)"</Command>
+      <Message Condition="'$(Configuration)|$(Platform)'=='Release|x64'">Assembling by NASM - %(Filename)%(Extension)</Message>
+      <Outputs Condition="'$(Configuration)|$(Platform)'=='Release|x64'">$(OutDir)%(Filename).obj;%(Outputs)</Outputs>
+    </CustomBuild>
+  </ItemGroup>
+  <ItemGroup>
+    <CustomBuild Include="..\decoder\core\x86\dct.asm">
+      <FileType>Document</FileType>
+      <Command Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">nasm -f win64 -DWIN64  -DHAVE_AVX2 -I../common/x86/  -I../hdecoder/incl -o "$(OutDir)%(Filename)_dec.obj" "%(FullPath)"</Command>
+      <Message Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">Assembling by NASM - %(Filename)_dec%(Extension)</Message>
+      <Outputs Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">$(OutDir)%(Filename)_dec.obj;%(Outputs)</Outputs>
+      <Command Condition="'$(Configuration)|$(Platform)'=='Release|x64'">nasm -f win64 -DWIN64  -DHAVE_AVX2 -I../common/x86/  -I../hdecoder/incl -o "$(OutDir)%(Filename)_dec.obj" "%(FullPath)"</Command>
+      <Outputs Condition="'$(Configuration)|$(Platform)'=='Release|x64'">$(OutDir)%(Filename)_dec.obj;%(Outputs)</Outputs>
+      <Message Condition="'$(Configuration)|$(Platform)'=='Release|x64'">Assembling by NASM - %(Filename)_dec%(Extension)</Message>
+    </CustomBuild>
+  </ItemGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
+  <ImportGroup Label="ExtensionTargets">
+  </ImportGroup>
+</Project>
\ No newline at end of file
--- /dev/null
+++ b/codec/vsproject/windecoder.vcxproj.filters
@@ -1,0 +1,401 @@
+<?xml version="1.0" encoding="utf-8"?>
+<Project ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+  <ItemGroup>
+    <ClCompile Include="..\decoder\core\src\au_parser.cpp">
+      <Filter>codec\decoder\core\src</Filter>
+    </ClCompile>
+    <ClCompile Include="..\decoder\core\src\bit_stream.cpp">
+      <Filter>codec\decoder\core\src</Filter>
+    </ClCompile>
+    <ClCompile Include="..\decoder\core\src\cabac_decoder.cpp">
+      <Filter>codec\decoder\core\src</Filter>
+    </ClCompile>
+    <ClCompile Include="..\common\src\deblocking_common.cpp">
+      <Filter>codec\common\src</Filter>
+    </ClCompile>
+    <ClCompile Include="..\decoder\core\src\deblocking.cpp">
+      <Filter>codec\decoder\core\src</Filter>
+    </ClCompile>
+    <ClCompile Include="..\decoder\core\src\decode_mb_aux.cpp">
+      <Filter>codec\decoder\core\src</Filter>
+    </ClCompile>
+    <ClCompile Include="..\decoder\core\src\decode_slice.cpp">
+      <Filter>codec\decoder\core\src</Filter>
+    </ClCompile>
+    <ClCompile Include="..\decoder\core\src\decoder_data_tables.cpp">
+      <Filter>codec\decoder\core\src</Filter>
+    </ClCompile>
+    <ClCompile Include="..\decoder\core\src\error_concealment.cpp">
+      <Filter>codec\decoder\core\src</Filter>
+    </ClCompile>
+    <ClCompile Include="..\decoder\core\src\decoder_core.cpp">
+      <Filter>codec\decoder\core\src</Filter>
+    </ClCompile>
+    <ClCompile Include="..\decoder\core\src\decoder.cpp">
+      <Filter>codec\decoder\core\src</Filter>
+    </ClCompile>
+    <ClCompile Include="..\common\src\expand_pic.cpp">
+      <Filter>codec\common\src</Filter>
+    </ClCompile>
+    <ClCompile Include="..\decoder\core\src\get_intra_predictor.cpp">
+      <Filter>codec\decoder\core\src</Filter>
+    </ClCompile>
+    <ClCompile Include="..\common\src\intra_pred_common.cpp">
+      <Filter>codec\common\src</Filter>
+    </ClCompile>
+    <ClCompile Include="..\decoder\core\src\manage_dec_ref.cpp">
+      <Filter>codec\decoder\core\src</Filter>
+    </ClCompile>
+    <ClCompile Include="..\decoder\core\src\parse_mb_syn_cabac.cpp">
+      <Filter>codec\decoder\core\src</Filter>
+    </ClCompile>
+    <ClCompile Include="..\decoder\core\src\parse_mb_syn_cavlc.cpp">
+      <Filter>codec\decoder\core\src</Filter>
+    </ClCompile>
+    <ClCompile Include="..\decoder\core\src\memmgr_nal_unit.cpp">
+      <Filter>codec\decoder\core\src</Filter>
+    </ClCompile>
+    <ClCompile Include="..\decoder\core\src\pic_queue.cpp">
+      <Filter>codec\decoder\core\src</Filter>
+    </ClCompile>
+    <ClCompile Include="..\common\src\memory_align.cpp">
+      <Filter>codec\common\src</Filter>
+    </ClCompile>
+    <ClCompile Include="..\decoder\core\src\mv_pred.cpp">
+      <Filter>codec\decoder\core\src</Filter>
+    </ClCompile>
+    <ClCompile Include="..\common\src\utils.cpp">
+      <Filter>codec\common\src</Filter>
+    </ClCompile>
+    <ClCompile Include="..\common\src\common_tables.cpp">
+      <Filter>codec\common\src</Filter>
+    </ClCompile>
+    <ClCompile Include="..\common\src\copy_mb.cpp">
+      <Filter>codec\common\src</Filter>
+    </ClCompile>
+    <ClCompile Include="..\common\src\cpu.cpp">
+      <Filter>codec\common\src</Filter>
+    </ClCompile>
+    <ClCompile Include="..\decoder\core\src\rec_mb.cpp">
+      <Filter>codec\decoder\core\src</Filter>
+    </ClCompile>
+    <ClCompile Include="..\decoder\core\src\fmo.cpp">
+      <Filter>codec\decoder\core\src</Filter>
+    </ClCompile>
+    <ClCompile Include="..\common\src\welsCodecTrace.cpp">
+      <Filter>codec\common\src</Filter>
+    </ClCompile>
+    <ClCompile Include="..\common\src\mc.cpp">
+      <Filter>codec\common\src</Filter>
+    </ClCompile>
+    <ClCompile Include="..\common\src\crt_util_safe_x.cpp">
+      <Filter>codec\common\src</Filter>
+    </ClCompile>
+    <ClCompile Include="..\common\src\sad_common.cpp">
+      <Filter>codec\common\src</Filter>
+    </ClCompile>
+    <ClCompile Include="..\common\src\WelsTaskThread.cpp">
+      <Filter>codec\common\src</Filter>
+    </ClCompile>
+    <ClCompile Include="..\common\src\WelsThread.cpp">
+      <Filter>codec\common\src</Filter>
+    </ClCompile>
+    <ClCompile Include="..\common\src\WelsThreadLib.cpp">
+      <Filter>codec\common\src</Filter>
+    </ClCompile>
+    <ClCompile Include="..\common\src\WelsThreadPool.cpp">
+      <Filter>codec\common\src</Filter>
+    </ClCompile>
+    <ClCompile Include="..\console\common\src\read_config.cpp">
+      <Filter>codec\common\src</Filter>
+    </ClCompile>
+    <ClCompile Include="..\console\dec\src\h264dec.cpp">
+      <Filter>codec\console\dec\src</Filter>
+    </ClCompile>
+    <ClCompile Include="..\console\dec\src\d3d9_utils.cpp">
+      <Filter>codec\console\dec\src</Filter>
+    </ClCompile>
+    <ClCompile Include="..\decoder\plus\src\welsDecoderExt.cpp">
+      <Filter>codec\decoder\plus\src</Filter>
+    </ClCompile>
+  </ItemGroup>
+  <ItemGroup>
+    <ClInclude Include="..\decoder\core\inc\au_parser.h">
+      <Filter>codec\decoder\core\inc</Filter>
+    </ClInclude>
+    <ClInclude Include="..\decoder\core\inc\bit_stream.h">
+      <Filter>codec\decoder\core\inc</Filter>
+    </ClInclude>
+    <ClInclude Include="..\decoder\core\inc\cabac_decoder.h">
+      <Filter>codec\decoder\core\inc</Filter>
+    </ClInclude>
+    <ClInclude Include="..\common\inc\deblocking_common.h">
+      <Filter>codec\common\inc</Filter>
+    </ClInclude>
+    <ClInclude Include="..\decoder\core\inc\deblocking.h">
+      <Filter>codec\decoder\core\inc</Filter>
+    </ClInclude>
+    <ClInclude Include="..\decoder\core\inc\decode_mb_aux.h">
+      <Filter>codec\decoder\core\inc</Filter>
+    </ClInclude>
+    <ClInclude Include="..\decoder\core\inc\dec_golomb.h">
+      <Filter>codec\decoder\core\inc</Filter>
+    </ClInclude>
+    <ClInclude Include="..\decoder\core\inc\decode_slice.h">
+      <Filter>codec\decoder\core\inc</Filter>
+    </ClInclude>
+    <ClInclude Include="..\decoder\core\inc\decoder_core.h">
+      <Filter>codec\decoder\core\src</Filter>
+    </ClInclude>
+    <ClInclude Include="..\decoder\core\inc\decoder_context.h">
+      <Filter>codec\decoder\core\inc</Filter>
+    </ClInclude>
+    <ClInclude Include="..\decoder\core\inc\decoder.h">
+      <Filter>codec\decoder\core\inc</Filter>
+    </ClInclude>
+    <ClInclude Include="..\decoder\core\inc\error_code.h">
+      <Filter>codec\decoder\core\inc</Filter>
+    </ClInclude>
+    <ClInclude Include="..\decoder\core\inc\get_intra_predictor.h">
+      <Filter>codec\decoder\core\inc</Filter>
+    </ClInclude>
+    <ClInclude Include="..\decoder\core\inc\manage_dec_ref.h">
+      <Filter>codec\decoder\core\inc</Filter>
+    </ClInclude>
+    <ClInclude Include="..\decoder\core\inc\error_concealment.h">
+      <Filter>codec\decoder\core\inc</Filter>
+    </ClInclude>
+    <ClInclude Include="..\decoder\core\inc\parse_mb_syn_cabac.h">
+      <Filter>codec\decoder\core\inc</Filter>
+    </ClInclude>
+    <ClInclude Include="..\decoder\core\inc\parse_mb_syn_cavlc.h">
+      <Filter>codec\decoder\core\inc</Filter>
+    </ClInclude>
+    <ClInclude Include="..\decoder\core\inc\parameter_sets.h">
+      <Filter>codec\decoder\core\inc</Filter>
+    </ClInclude>
+    <ClInclude Include="..\decoder\core\inc\memmgr_nal_unit.h">
+      <Filter>codec\decoder\core\inc</Filter>
+    </ClInclude>
+    <ClInclude Include="..\decoder\core\inc\pic_queue.h">
+      <Filter>codec\decoder\core\inc</Filter>
+    </ClInclude>
+    <ClInclude Include="..\decoder\core\inc\picture.h">
+      <Filter>codec\decoder\core\inc</Filter>
+    </ClInclude>
+    <ClInclude Include="..\common\inc\memory_align.h">
+      <Filter>codec\common\inc</Filter>
+    </ClInclude>
+    <ClInclude Include="..\decoder\core\inc\mv_pred.h">
+      <Filter>codec\decoder\core\inc</Filter>
+    </ClInclude>
+    <ClInclude Include="..\decoder\core\inc\nal_prefix.h">
+      <Filter>codec\decoder\core\inc</Filter>
+    </ClInclude>
+    <ClInclude Include="..\decoder\core\inc\nalu.h">
+      <Filter>codec\decoder\core\inc</Filter>
+    </ClInclude>
+    <ClInclude Include="..\common\inc\utils.h">
+      <Filter>codec\common\inc</Filter>
+    </ClInclude>
+    <ClInclude Include="..\common\inc\copy_mb.h">
+      <Filter>codec\common\src</Filter>
+    </ClInclude>
+    <ClInclude Include="..\decoder\core\inc\rec_mb.h">
+      <Filter>codec\decoder\core\inc</Filter>
+    </ClInclude>
+    <ClInclude Include="..\decoder\core\inc\mb_cache.h">
+      <Filter>codec\decoder\core\inc</Filter>
+    </ClInclude>
+    <ClInclude Include="..\decoder\core\inc\vlc_decoder.h">
+      <Filter>codec\decoder\core\inc</Filter>
+    </ClInclude>
+    <ClInclude Include="..\decoder\core\inc\fmo.h">
+      <Filter>codec\decoder\core\inc</Filter>
+    </ClInclude>
+    <ClInclude Include="..\common\inc\welsCodecTrace.h">
+      <Filter>codec\common\inc</Filter>
+    </ClInclude>
+    <ClInclude Include="..\common\inc\typedefs.h">
+      <Filter>codec\common\inc</Filter>
+    </ClInclude>
+    <ClInclude Include="..\common\inc\expand_pic.h">
+      <Filter>codec\common\inc</Filter>
+    </ClInclude>
+    <ClInclude Include="..\common\inc\golomb_common.h">
+      <Filter>codec\common\inc</Filter>
+    </ClInclude>
+    <ClInclude Include="..\decoder\core\inc\wels_common_basis.h">
+      <Filter>codec\decoder\core\inc</Filter>
+    </ClInclude>
+    <ClInclude Include="..\common\inc\wels_common_defs.h">
+      <Filter>codec\common\inc</Filter>
+    </ClInclude>
+    <ClInclude Include="..\common\inc\intra_pred_common.h">
+      <Filter>codec\common\inc</Filter>
+    </ClInclude>
+    <ClInclude Include="..\common\inc\cpu.h">
+      <Filter>codec\common\inc</Filter>
+    </ClInclude>
+    <ClInclude Include="..\common\inc\wels_const_common.h">
+      <Filter>codec\common\inc</Filter>
+    </ClInclude>
+    <ClInclude Include="..\decoder\core\inc\wels_const.h">
+      <Filter>codec\decoder\core\inc</Filter>
+    </ClInclude>
+    <ClInclude Include="..\decoder\core\inc\slice.h">
+      <Filter>codec\decoder\core\inc</Filter>
+    </ClInclude>
+    <ClInclude Include="..\common\inc\mc.h">
+      <Filter>codec\common\inc</Filter>
+    </ClInclude>
+    <ClInclude Include="..\common\inc\macros.h">
+      <Filter>codec\common\inc</Filter>
+    </ClInclude>
+    <ClInclude Include="..\common\inc\ls_defines.h">
+      <Filter>codec\common\inc</Filter>
+    </ClInclude>
+    <ClInclude Include="..\common\inc\cpu_core.h">
+      <Filter>codec\common\inc</Filter>
+    </ClInclude>
+    <ClInclude Include="..\common\inc\crt_util_safe_x.h">
+      <Filter>codec\common\inc</Filter>
+    </ClInclude>
+    <ClInclude Include="..\common\inc\sad_common.h">
+      <Filter>codec\common\inc</Filter>
+    </ClInclude>
+    <ClInclude Include="..\common\inc\WelsList.h">
+      <Filter>codec\common\inc</Filter>
+    </ClInclude>
+    <ClInclude Include="..\common\inc\WelsLock.h">
+      <Filter>codec\common\inc</Filter>
+    </ClInclude>
+    <ClInclude Include="..\common\inc\WelsTask.h">
+      <Filter>codec\common\inc</Filter>
+    </ClInclude>
+    <ClInclude Include="..\common\inc\WelsTaskThread.h">
+      <Filter>codec\common\inc</Filter>
+    </ClInclude>
+    <ClInclude Include="..\common\inc\WelsThread.h">
+      <Filter>codec\common\inc</Filter>
+    </ClInclude>
+    <ClInclude Include="..\common\inc\WelsThreadLib.h">
+      <Filter>codec\common\inc</Filter>
+    </ClInclude>
+    <ClInclude Include="..\common\inc\WelsThreadPool.h">
+      <Filter>codec\common\inc</Filter>
+    </ClInclude>
+    <ClInclude Include="..\common\inc\measure_time.h">
+      <Filter>codec\common\inc</Filter>
+    </ClInclude>
+    <ClInclude Include="..\common\inc\version.h">
+      <Filter>codec\common\inc</Filter>
+    </ClInclude>
+    <ClInclude Include="..\console\common\inc\read_config.h">
+      <Filter>codec\common\inc</Filter>
+    </ClInclude>
+    <ClInclude Include="..\decoder\core\inc\dec_frame.h">
+      <Filter>codec\decoder\core\inc</Filter>
+    </ClInclude>
+    <ClInclude Include="..\console\dec\inc\d3d9_utils.h">
+      <Filter>codec\console\dec\inc</Filter>
+    </ClInclude>
+    <ClInclude Include="..\decoder\plus\inc\welsDecoderExt.h">
+      <Filter>codec\decoder\plus\inc</Filter>
+    </ClInclude>
+  </ItemGroup>
+  <ItemGroup>
+    <CustomBuild Include="..\common\x86\mc_luma.asm">
+      <Filter>codec\common\x86</Filter>
+    </CustomBuild>
+    <CustomBuild Include="..\common\x86\intra_pred_com.asm">
+      <Filter>codec\common\x86</Filter>
+    </CustomBuild>
+    <CustomBuild Include="..\common\x86\expand_picture.asm">
+      <Filter>codec\common\x86</Filter>
+    </CustomBuild>
+    <CustomBuild Include="..\common\x86\deblock.asm">
+      <Filter>codec\common\x86</Filter>
+    </CustomBuild>
+    <CustomBuild Include="..\decoder\core\x86\dct.asm">
+      <Filter>codec\decoder\core\x86</Filter>
+    </CustomBuild>
+    <CustomBuild Include="..\common\x86\dct.asm">
+      <Filter>codec\common\x86</Filter>
+    </CustomBuild>
+    <CustomBuild Include="..\common\x86\cpuid.asm">
+      <Filter>codec\common\x86</Filter>
+    </CustomBuild>
+    <CustomBuild Include="..\common\x86\asm_inc.asm">
+      <Filter>codec\common\x86</Filter>
+    </CustomBuild>
+    <CustomBuild Include="..\decoder\core\x86\intra_pred.asm">
+      <Filter>codec\decoder\core\x86</Filter>
+    </CustomBuild>
+    <CustomBuild Include="..\common\x86\mc_chroma.asm">
+      <Filter>codec\common\x86</Filter>
+    </CustomBuild>
+    <CustomBuild Include="..\common\x86\mb_copy.asm">
+      <Filter>codec\common\x86</Filter>
+    </CustomBuild>
+    <CustomBuild Include="..\common\x86\satd_sad.asm">
+      <Filter>codec\common\x86</Filter>
+    </CustomBuild>
+    <CustomBuild Include="..\common\x86\vaa.asm">
+      <Filter>codec\common\x86</Filter>
+    </CustomBuild>
+  </ItemGroup>
+  <ItemGroup>
+    <Filter Include="codec">
+      <UniqueIdentifier>{0e369d4c-f75f-4348-ae15-39cadfd3c9d7}</UniqueIdentifier>
+    </Filter>
+    <Filter Include="codec\decoder">
+      <UniqueIdentifier>{fbb13c60-d542-4bc3-82aa-bd7a5c1e565a}</UniqueIdentifier>
+    </Filter>
+    <Filter Include="codec\decoder\core">
+      <UniqueIdentifier>{dba9698e-b19e-404c-bbf2-aad8d9da829a}</UniqueIdentifier>
+    </Filter>
+    <Filter Include="codec\decoder\core\inc">
+      <UniqueIdentifier>{e4862e81-1fc9-4157-bb4c-8eff694fbcb8}</UniqueIdentifier>
+    </Filter>
+    <Filter Include="codec\decoder\core\src">
+      <UniqueIdentifier>{44a3510f-aa48-4798-a564-12d871a4a471}</UniqueIdentifier>
+    </Filter>
+    <Filter Include="codec\common">
+      <UniqueIdentifier>{a76f3531-c469-408d-8ade-8ca0e3a745d1}</UniqueIdentifier>
+    </Filter>
+    <Filter Include="codec\common\inc">
+      <UniqueIdentifier>{008dbff3-97f0-4c99-9b77-9a013559db02}</UniqueIdentifier>
+    </Filter>
+    <Filter Include="codec\common\src">
+      <UniqueIdentifier>{314d2179-c124-4a8f-a516-032c81ba0b7b}</UniqueIdentifier>
+    </Filter>
+    <Filter Include="codec\common\x86">
+      <UniqueIdentifier>{f49d90d8-48a2-43c2-ab83-95fe0baf726e}</UniqueIdentifier>
+    </Filter>
+    <Filter Include="codec\decoder\core\x86">
+      <UniqueIdentifier>{77022dca-1ede-417b-bf72-ff6d2fb2de4e}</UniqueIdentifier>
+    </Filter>
+    <Filter Include="codec\console">
+      <UniqueIdentifier>{d3858359-ad7b-4dc1-a883-9e191b1c8c36}</UniqueIdentifier>
+    </Filter>
+    <Filter Include="codec\console\dec">
+      <UniqueIdentifier>{498f57ca-2c2d-4bc7-b2ea-3bb729cf751c}</UniqueIdentifier>
+    </Filter>
+    <Filter Include="codec\console\dec\src">
+      <UniqueIdentifier>{335c9bd0-992b-4020-8947-57c8377753dd}</UniqueIdentifier>
+    </Filter>
+    <Filter Include="codec\console\dec\inc">
+      <UniqueIdentifier>{ff44409c-81b3-408b-b96c-0398fb26d760}</UniqueIdentifier>
+    </Filter>
+    <Filter Include="codec\decoder\plus">
+      <UniqueIdentifier>{b0596b35-e849-4f25-b627-ad777adcd648}</UniqueIdentifier>
+    </Filter>
+    <Filter Include="codec\decoder\plus\src">
+      <UniqueIdentifier>{d7cadc69-1c45-4f5c-b866-a6fda8d7056a}</UniqueIdentifier>
+    </Filter>
+    <Filter Include="codec\decoder\plus\inc">
+      <UniqueIdentifier>{bf350651-afd7-4525-87d8-76ea18f2c051}</UniqueIdentifier>
+    </Filter>
+  </ItemGroup>
+</Project>
\ No newline at end of file
binary files /dev/null b/res/HighProfile_B_Frame_1280x544_2397p.h264 differ
binary files /dev/null b/res/HighProfile_B_Frame_1280x720_2397p.h264 differ
binary files /dev/null b/res/HighProfile_B_Frame_1920x1080p_2397fps.h264 differ
binary files /dev/null b/res/HighProfile_B_Frame_1920x1080p_30fps.h264 differ
--- a/test/BaseDecoderTest.h
+++ b/test/BaseDecoderTest.h
@@ -1,52 +1,53 @@
-#ifndef __BASEDECODERTEST_H__
-#define __BASEDECODERTEST_H__
-
-#include "test_stdint.h"
-#include <limits.h>
-#include <fstream>
-#include "codec_api.h"
-
-#include "utils/BufferedData.h"
-
-class BaseDecoderTest {
- public:
-  struct Plane {
-    const uint8_t* data;
-    int width;
-    int height;
-    int stride;
-  };
-
-  struct Frame {
-    Plane y;
-    Plane u;
-    Plane v;
-  };
-
-  struct Callback {
-    virtual void onDecodeFrame (const Frame& frame) = 0;
-  };
-
-  BaseDecoderTest();
-  int32_t SetUp();
-  void TearDown();
-  void DecodeFile (const char* fileName, Callback* cbk);
-
-  bool Open (const char* fileName);
-  bool DecodeNextFrame (Callback* cbk);
-  ISVCDecoder* decoder_;
-
- private:
-  void DecodeFrame (const uint8_t* src, size_t sliceSize, Callback* cbk);
-
-  std::ifstream file_;
-  BufferedData buf_;
-  enum {
-    OpenFile,
-    Decoding,
-    EndOfStream,
-    End
-  } decodeStatus_;
-};
-
-#endif //__BASEDECODERTEST_H__
+#ifndef __BASEDECODERTEST_H__
+#define __BASEDECODERTEST_H__
+
+#include "test_stdint.h"
+#include <limits.h>
+#include <fstream>
+#include "codec_api.h"
+
+#include "utils/BufferedData.h"
+
+class BaseDecoderTest {
+ public:
+  struct Plane {
+    const uint8_t* data;
+    int width;
+    int height;
+    int stride;
+  };
+
+  struct Frame {
+    Plane y;
+    Plane u;
+    Plane v;
+  };
+
+  struct Callback {
+    virtual void onDecodeFrame (const Frame& frame) = 0;
+  };
+
+  BaseDecoderTest();
+  int32_t SetUp();
+  void TearDown();
+  void DecodeFile (const char* fileName, Callback* cbk);
+
+  bool Open (const char* fileName);
+  bool DecodeNextFrame (Callback* cbk);
+  ISVCDecoder* decoder_;
+
+ private:
+  void DecodeFrame (const uint8_t* src, size_t sliceSize, Callback* cbk);
+  void FlushFrame (Callback* cbk);
+
+  std::ifstream file_;
+  BufferedData buf_;
+  enum {
+    OpenFile,
+    Decoding,
+    EndOfStream,
+    End
+  } decodeStatus_;
+};
+
+#endif //__BASEDECODERTEST_H__
--- a/test/api/BaseDecoderTest.cpp
+++ b/test/api/BaseDecoderTest.cpp
@@ -1,177 +1,219 @@
-#include <fstream>
-#include <gtest/gtest.h>
-#include "codec_def.h"
-#include "codec_app_def.h"
-#include "utils/BufferedData.h"
-#include "BaseDecoderTest.h"
-
-static void ReadFrame (std::ifstream* file, BufferedData* buf) {
-  // start code of a frame is {0, 0, 0, 1}
-  int zeroCount = 0;
-  char b;
-
-  buf->Clear();
-  for (;;) {
-    file->read (&b, 1);
-    if (file->gcount() != 1) { // end of file
-      return;
-    }
-    if (!buf->PushBack (b)) {
-      FAIL() << "unable to allocate memory";
-    }
-
-    if (buf->Length() <= 4) {
-      continue;
-    }
-
-    if (zeroCount < 3) {
-      zeroCount = b != 0 ? 0 : zeroCount + 1;
-    } else {
-      if (b == 1) {
-        if (file->seekg (-4, file->cur).good()) {
-          buf->SetLength (buf->Length() - 4);
-          return;
-        } else {
-          FAIL() << "unable to seek file";
-        }
-      } else if (b == 0) {
-        zeroCount = 3;
-      } else {
-        zeroCount = 0;
-      }
-    }
-  }
-}
-
-BaseDecoderTest::BaseDecoderTest()
-  : decoder_ (NULL), decodeStatus_ (OpenFile) {}
-
-int32_t BaseDecoderTest::SetUp() {
-  long rv = WelsCreateDecoder (&decoder_);
-  EXPECT_EQ (0, rv);
-  EXPECT_TRUE (decoder_ != NULL);
-  if (decoder_ == NULL) {
-    return rv;
-  }
-
-  SDecodingParam decParam;
-  memset (&decParam, 0, sizeof (SDecodingParam));
-  decParam.uiTargetDqLayer = UCHAR_MAX;
-  decParam.eEcActiveIdc = ERROR_CON_SLICE_COPY;
-  decParam.sVideoProperty.eVideoBsType = VIDEO_BITSTREAM_DEFAULT;
-
-  rv = decoder_->Initialize (&decParam);
-  EXPECT_EQ (0, rv);
-  return (int32_t)rv;
-}
-
-void BaseDecoderTest::TearDown() {
-  if (decoder_ != NULL) {
-    decoder_->Uninitialize();
-    WelsDestroyDecoder (decoder_);
-  }
-}
-
-
-void BaseDecoderTest::DecodeFrame (const uint8_t* src, size_t sliceSize, Callback* cbk) {
-  uint8_t* data[3];
-  SBufferInfo bufInfo;
-  memset (data, 0, sizeof (data));
-  memset (&bufInfo, 0, sizeof (SBufferInfo));
-
-  DECODING_STATE rv = decoder_->DecodeFrame2 (src, (int) sliceSize, data, &bufInfo);
-  ASSERT_TRUE (rv == dsErrorFree);
-
-  if (bufInfo.iBufferStatus == 1 && cbk != NULL) {
-    const Frame frame = {
-      {
-        // y plane
-        data[0],
-        bufInfo.UsrData.sSystemBuffer.iWidth,
-        bufInfo.UsrData.sSystemBuffer.iHeight,
-        bufInfo.UsrData.sSystemBuffer.iStride[0]
-      },
-      {
-        // u plane
-        data[1],
-        bufInfo.UsrData.sSystemBuffer.iWidth / 2,
-        bufInfo.UsrData.sSystemBuffer.iHeight / 2,
-        bufInfo.UsrData.sSystemBuffer.iStride[1]
-      },
-      {
-        // v plane
-        data[2],
-        bufInfo.UsrData.sSystemBuffer.iWidth / 2,
-        bufInfo.UsrData.sSystemBuffer.iHeight / 2,
-        bufInfo.UsrData.sSystemBuffer.iStride[1]
-      },
-    };
-    cbk->onDecodeFrame (frame);
-  }
-}
-void BaseDecoderTest::DecodeFile (const char* fileName, Callback* cbk) {
-  std::ifstream file (fileName, std::ios::in | std::ios::binary);
-  ASSERT_TRUE (file.is_open());
-
-  BufferedData buf;
-  while (true) {
-    ReadFrame (&file, &buf);
-    if (::testing::Test::HasFatalFailure()) {
-      return;
-    }
-    if (buf.Length() == 0) {
-      break;
-    }
-    DecodeFrame (buf.data(), buf.Length(), cbk);
-    if (::testing::Test::HasFatalFailure()) {
-      return;
-    }
-  }
-
-  int32_t iEndOfStreamFlag = 1;
-  decoder_->SetOption (DECODER_OPTION_END_OF_STREAM, &iEndOfStreamFlag);
-
-  // Get pending last frame
-  DecodeFrame (NULL, 0, cbk);
-}
-
-bool BaseDecoderTest::Open (const char* fileName) {
-  if (decodeStatus_ == OpenFile) {
-    file_.open (fileName, std::ios_base::out | std::ios_base::binary);
-    if (file_.is_open()) {
-      decodeStatus_ = Decoding;
-      return true;
-    }
-  }
-  return false;
-}
-
-bool BaseDecoderTest::DecodeNextFrame (Callback* cbk) {
-  switch (decodeStatus_) {
-  case Decoding:
-    ReadFrame (&file_, &buf_);
-    if (::testing::Test::HasFatalFailure()) {
-      return false;
-    }
-    if (buf_.Length() == 0) {
-      decodeStatus_ = EndOfStream;
-      return true;
-    }
-    DecodeFrame (buf_.data(), buf_.Length(), cbk);
-    if (::testing::Test::HasFatalFailure()) {
-      return false;
-    }
-    return true;
-  case EndOfStream: {
-    int32_t iEndOfStreamFlag = 1;
-    decoder_->SetOption (DECODER_OPTION_END_OF_STREAM, &iEndOfStreamFlag);
-    DecodeFrame (NULL, 0, cbk);
-    decodeStatus_ = End;
-    break;
-  }
-  case OpenFile:
-  case End:
-    break;
-  }
-  return false;
-}
+#include <fstream>
+#include <gtest/gtest.h>
+#include "codec_def.h"
+#include "codec_app_def.h"
+#include "utils/BufferedData.h"
+#include "BaseDecoderTest.h"
+
+static void ReadFrame (std::ifstream* file, BufferedData* buf) {
+  // start code of a frame is {0, 0, 0, 1}
+  int zeroCount = 0;
+  char b;
+
+  buf->Clear();
+  for (;;) {
+    file->read (&b, 1);
+    if (file->gcount() != 1) { // end of file
+      return;
+    }
+    if (!buf->PushBack (b)) {
+      FAIL() << "unable to allocate memory";
+    }
+
+    if (buf->Length() <= 4) {
+      continue;
+    }
+
+    if (zeroCount < 3) {
+      zeroCount = b != 0 ? 0 : zeroCount + 1;
+    } else {
+      if (b == 1) {
+        if (file->seekg (-4, file->cur).good()) {
+          buf->SetLength (buf->Length() - 4);
+          return;
+        } else {
+          FAIL() << "unable to seek file";
+        }
+      } else if (b == 0) {
+        zeroCount = 3;
+      } else {
+        zeroCount = 0;
+      }
+    }
+  }
+}
+
+BaseDecoderTest::BaseDecoderTest()
+  : decoder_ (NULL), decodeStatus_ (OpenFile) {}
+
+int32_t BaseDecoderTest::SetUp() {
+  long rv = WelsCreateDecoder (&decoder_);
+  EXPECT_EQ (0, rv);
+  EXPECT_TRUE (decoder_ != NULL);
+  if (decoder_ == NULL) {
+    return rv;
+  }
+
+  SDecodingParam decParam;
+  memset (&decParam, 0, sizeof (SDecodingParam));
+  decParam.uiTargetDqLayer = UCHAR_MAX;
+  decParam.eEcActiveIdc = ERROR_CON_SLICE_COPY;
+  decParam.sVideoProperty.eVideoBsType = VIDEO_BITSTREAM_DEFAULT;
+
+  rv = decoder_->Initialize (&decParam);
+  EXPECT_EQ (0, rv);
+  return (int32_t)rv;
+}
+
+void BaseDecoderTest::TearDown() {
+  if (decoder_ != NULL) {
+    decoder_->Uninitialize();
+    WelsDestroyDecoder (decoder_);
+  }
+}
+
+
+void BaseDecoderTest::DecodeFrame (const uint8_t* src, size_t sliceSize, Callback* cbk) {
+  uint8_t* data[3];
+  SBufferInfo bufInfo;
+  memset (data, 0, sizeof (data));
+  memset (&bufInfo, 0, sizeof (SBufferInfo));
+
+  DECODING_STATE rv = decoder_->DecodeFrame2 (src, (int) sliceSize, data, &bufInfo);
+  ASSERT_TRUE (rv == dsErrorFree);
+
+  if (bufInfo.iBufferStatus == 1 && cbk != NULL) {
+    const Frame frame = {
+      {
+        // y plane
+        data[0],
+        bufInfo.UsrData.sSystemBuffer.iWidth,
+        bufInfo.UsrData.sSystemBuffer.iHeight,
+        bufInfo.UsrData.sSystemBuffer.iStride[0]
+      },
+      {
+        // u plane
+        data[1],
+        bufInfo.UsrData.sSystemBuffer.iWidth / 2,
+        bufInfo.UsrData.sSystemBuffer.iHeight / 2,
+        bufInfo.UsrData.sSystemBuffer.iStride[1]
+      },
+      {
+        // v plane
+        data[2],
+        bufInfo.UsrData.sSystemBuffer.iWidth / 2,
+        bufInfo.UsrData.sSystemBuffer.iHeight / 2,
+        bufInfo.UsrData.sSystemBuffer.iStride[1]
+      },
+    };
+    cbk->onDecodeFrame (frame);
+  }
+}
+void BaseDecoderTest::FlushFrame (Callback* cbk) {
+  uint8_t* data[3];
+  SBufferInfo bufInfo;
+  memset (data, 0, sizeof (data));
+  memset (&bufInfo, 0, sizeof (SBufferInfo));
+
+  DECODING_STATE rv = decoder_->FlushFrame (data, &bufInfo);
+  ASSERT_TRUE (rv == dsErrorFree);
+
+  if (bufInfo.iBufferStatus == 1 && cbk != NULL) {
+    const Frame frame = {
+      {
+        // y plane
+        data[0],
+        bufInfo.UsrData.sSystemBuffer.iWidth,
+        bufInfo.UsrData.sSystemBuffer.iHeight,
+        bufInfo.UsrData.sSystemBuffer.iStride[0]
+      },
+      {
+        // u plane
+        data[1],
+        bufInfo.UsrData.sSystemBuffer.iWidth / 2,
+        bufInfo.UsrData.sSystemBuffer.iHeight / 2,
+        bufInfo.UsrData.sSystemBuffer.iStride[1]
+      },
+      {
+        // v plane
+        data[2],
+        bufInfo.UsrData.sSystemBuffer.iWidth / 2,
+        bufInfo.UsrData.sSystemBuffer.iHeight / 2,
+        bufInfo.UsrData.sSystemBuffer.iStride[1]
+      },
+    };
+    cbk->onDecodeFrame (frame);
+  }
+}
+void BaseDecoderTest::DecodeFile (const char* fileName, Callback* cbk) {
+  std::ifstream file (fileName, std::ios::in | std::ios::binary);
+  ASSERT_TRUE (file.is_open());
+
+  BufferedData buf;
+  while (true) {
+    ReadFrame (&file, &buf);
+    if (::testing::Test::HasFatalFailure()) {
+      return;
+    }
+    if (buf.Length() == 0) {
+      break;
+    }
+    DecodeFrame (buf.data(), buf.Length(), cbk);
+    if (::testing::Test::HasFatalFailure()) {
+      return;
+    }
+  }
+
+  int32_t iEndOfStreamFlag = 1;
+  decoder_->SetOption (DECODER_OPTION_END_OF_STREAM, &iEndOfStreamFlag);
+
+  // Get pending last frame
+  DecodeFrame (NULL, 0, cbk);
+  // Flush out last frames in decoder buffer
+  int32_t num_of_frames_in_buffer = 0;
+  decoder_->GetOption (DECODER_OPTION_NUM_OF_FRAMES_REMAINING_IN_BUFFER, &num_of_frames_in_buffer);
+  for (int32_t i = 0; i < num_of_frames_in_buffer; ++i) {
+    FlushFrame (cbk);
+  }
+}
+
+bool BaseDecoderTest::Open (const char* fileName) {
+  if (decodeStatus_ == OpenFile) {
+    file_.open (fileName, std::ios_base::out | std::ios_base::binary);
+    if (file_.is_open()) {
+      decodeStatus_ = Decoding;
+      return true;
+    }
+  }
+  return false;
+}
+
+bool BaseDecoderTest::DecodeNextFrame (Callback* cbk) {
+  switch (decodeStatus_) {
+  case Decoding:
+    ReadFrame (&file_, &buf_);
+    if (::testing::Test::HasFatalFailure()) {
+      return false;
+    }
+    if (buf_.Length() == 0) {
+      decodeStatus_ = EndOfStream;
+      return true;
+    }
+    DecodeFrame (buf_.data(), buf_.Length(), cbk);
+    if (::testing::Test::HasFatalFailure()) {
+      return false;
+    }
+    return true;
+  case EndOfStream: {
+    int32_t iEndOfStreamFlag = 1;
+    decoder_->SetOption (DECODER_OPTION_END_OF_STREAM, &iEndOfStreamFlag);
+    DecodeFrame (NULL, 0, cbk);
+    decodeStatus_ = End;
+    break;
+  }
+  case OpenFile:
+  case End:
+    break;
+  }
+  return false;
+}
--- a/test/api/cpp_interface_test.cpp
+++ b/test/api/cpp_interface_test.cpp
@@ -1,134 +1,138 @@
-#include <gtest/gtest.h>
-#include "codec_api.h"
-#include <stddef.h>
-
-static void CheckFunctionOrder (int expect, int actual, const char* name) {
-  EXPECT_EQ (expect, actual) << "Wrong function order: " << name;
-}
-
-typedef void (*CheckFunc) (int, int, const char*);
-extern "C" void CheckEncoderInterface (ISVCEncoder* p, CheckFunc);
-extern "C" void CheckDecoderInterface (ISVCDecoder* p, CheckFunc);
-extern "C" size_t GetBoolSize (void);
-extern "C" size_t GetBoolOffset (void);
-extern "C" size_t GetBoolStructSize (void);
-
-// Store the 'this' pointer to verify 'this' is received as expected from C code.
-static void* gThis;
-
-/**
- * Return a unique number for each virtual function so that we are able to
- * check if the order of functions in the virtual table is as expected.
- */
-struct SVCEncoderImpl : public ISVCEncoder {
-  virtual ~SVCEncoderImpl() {}
-  virtual int EXTAPI Initialize (const SEncParamBase* pParam) {
-    EXPECT_TRUE (gThis == this);
-    return 1;
-  }
-  virtual int EXTAPI InitializeExt (const SEncParamExt* pParam) {
-    EXPECT_TRUE (gThis == this);
-    return 2;
-  }
-  virtual int EXTAPI GetDefaultParams (SEncParamExt* pParam) {
-    EXPECT_TRUE (gThis == this);
-    return 3;
-  }
-  virtual int EXTAPI Uninitialize() {
-    EXPECT_TRUE (gThis == this);
-    return 4;
-  }
-  virtual int EXTAPI EncodeFrame (const SSourcePicture* kpSrcPic,
-                                  SFrameBSInfo* pBsInfo) {
-    EXPECT_TRUE (gThis == this);
-    return 5;
-  }
-  virtual int EXTAPI EncodeParameterSets (SFrameBSInfo* pBsInfo) {
-    EXPECT_TRUE (gThis == this);
-    return 6;
-  }
-  virtual int EXTAPI ForceIntraFrame (bool bIDR,int iLayerId = -1) {
-    EXPECT_TRUE (gThis == this);
-    return 7;
-  }
-  virtual int EXTAPI SetOption (ENCODER_OPTION eOptionId, void* pOption) {
-    EXPECT_TRUE (gThis == this);
-    return 8;
-  }
-  virtual int EXTAPI GetOption (ENCODER_OPTION eOptionId, void* pOption) {
-    EXPECT_TRUE (gThis == this);
-    return 9;
-  }
-};
-
-struct SVCDecoderImpl : public ISVCDecoder {
-  virtual ~SVCDecoderImpl() {}
-  virtual long EXTAPI Initialize (const SDecodingParam* pParam) {
-    EXPECT_TRUE (gThis == this);
-    return 1;
-  }
-  virtual long EXTAPI Uninitialize() {
-    EXPECT_TRUE (gThis == this);
-    return 2;
-  }
-  virtual DECODING_STATE EXTAPI DecodeFrame (const unsigned char* pSrc,
-      const int iSrcLen, unsigned char** ppDst, int* pStride,
-      int& iWidth, int& iHeight) {
-    EXPECT_TRUE (gThis == this);
-    return static_cast<DECODING_STATE> (3);
-  }
-  virtual DECODING_STATE EXTAPI DecodeFrameNoDelay (const unsigned char* pSrc,
-      const int iSrcLen, unsigned char** ppDst, SBufferInfo* pDstInfo) {
-    EXPECT_TRUE (gThis == this);
-    return static_cast<DECODING_STATE> (4);
-  }
-  virtual DECODING_STATE EXTAPI DecodeFrame2 (const unsigned char* pSrc,
-      const int iSrcLen, unsigned char** ppDst, SBufferInfo* pDstInfo) {
-    EXPECT_TRUE (gThis == this);
-    return static_cast<DECODING_STATE> (5);
-  }
-  virtual DECODING_STATE EXTAPI DecodeFrameEx (const unsigned char* pSrc,
-      const int iSrcLen, unsigned char* pDst, int iDstStride,
-      int& iDstLen, int& iWidth, int& iHeight, int& iColorFormat) {
-    EXPECT_TRUE (gThis == this);
-    return static_cast<DECODING_STATE> (6);
-  }
-  virtual DECODING_STATE EXTAPI DecodeParser (const unsigned char* pSrc,
-      const int iSrcLen, SParserBsInfo* pDstInfo) {
-    EXPECT_TRUE (gThis == this);
-    return static_cast<DECODING_STATE> (7);
-  }
-  virtual long EXTAPI SetOption (DECODER_OPTION eOptionId, void* pOption) {
-    EXPECT_TRUE (gThis == this);
-    return static_cast<DECODING_STATE> (8);
-  }
-  virtual long EXTAPI GetOption (DECODER_OPTION eOptionId, void* pOption) {
-    EXPECT_TRUE (gThis == this);
-    return static_cast<DECODING_STATE> (9);
-  }
-};
-
-TEST (ISVCEncoderTest, CheckFunctionOrder) {
-  SVCEncoderImpl* p = new SVCEncoderImpl;
-  gThis = p;
-  CheckEncoderInterface (p, CheckFunctionOrder);
-  delete p;
-}
-
-TEST (ISVCDecoderTest, CheckFunctionOrder) {
-  SVCDecoderImpl* p = new SVCDecoderImpl;
-  gThis = p;
-  CheckDecoderInterface (p, CheckFunctionOrder);
-  delete p;
-}
-
-struct bool_test_struct {
-  char c;
-  bool b;
-};
-
-TEST (ISVCDecoderEncoderTest, CheckCAbi) {
-  EXPECT_EQ (sizeof (bool), GetBoolSize()) << "Wrong size of bool type";
-  EXPECT_EQ (offsetof (bool_test_struct, b), GetBoolOffset()) << "Wrong alignment of bool in a struct";
-  EXPECT_EQ (sizeof (bool_test_struct), GetBoolStructSize()) << "Wrong size of struct with a bool";
-}
+#include <gtest/gtest.h>
+#include "codec_api.h"
+#include <stddef.h>
+
+static void CheckFunctionOrder (int expect, int actual, const char* name) {
+  EXPECT_EQ (expect, actual) << "Wrong function order: " << name;
+}
+
+typedef void (*CheckFunc) (int, int, const char*);
+extern "C" void CheckEncoderInterface (ISVCEncoder* p, CheckFunc);
+extern "C" void CheckDecoderInterface (ISVCDecoder* p, CheckFunc);
+extern "C" size_t GetBoolSize (void);
+extern "C" size_t GetBoolOffset (void);
+extern "C" size_t GetBoolStructSize (void);
+
+// Store the 'this' pointer to verify 'this' is received as expected from C code.
+static void* gThis;
+
+/**
+ * Return a unique number for each virtual function so that we are able to
+ * check if the order of functions in the virtual table is as expected.
+ */
+struct SVCEncoderImpl : public ISVCEncoder {
+  virtual ~SVCEncoderImpl() {}
+  virtual int EXTAPI Initialize (const SEncParamBase* pParam) {
+    EXPECT_TRUE (gThis == this);
+    return 1;
+  }
+  virtual int EXTAPI InitializeExt (const SEncParamExt* pParam) {
+    EXPECT_TRUE (gThis == this);
+    return 2;
+  }
+  virtual int EXTAPI GetDefaultParams (SEncParamExt* pParam) {
+    EXPECT_TRUE (gThis == this);
+    return 3;
+  }
+  virtual int EXTAPI Uninitialize() {
+    EXPECT_TRUE (gThis == this);
+    return 4;
+  }
+  virtual int EXTAPI EncodeFrame (const SSourcePicture* kpSrcPic,
+                                  SFrameBSInfo* pBsInfo) {
+    EXPECT_TRUE (gThis == this);
+    return 5;
+  }
+  virtual int EXTAPI EncodeParameterSets (SFrameBSInfo* pBsInfo) {
+    EXPECT_TRUE (gThis == this);
+    return 6;
+  }
+  virtual int EXTAPI ForceIntraFrame (bool bIDR, int iLayerId = -1) {
+    EXPECT_TRUE (gThis == this);
+    return 7;
+  }
+  virtual int EXTAPI SetOption (ENCODER_OPTION eOptionId, void* pOption) {
+    EXPECT_TRUE (gThis == this);
+    return 8;
+  }
+  virtual int EXTAPI GetOption (ENCODER_OPTION eOptionId, void* pOption) {
+    EXPECT_TRUE (gThis == this);
+    return 9;
+  }
+};
+
+struct SVCDecoderImpl : public ISVCDecoder {
+  virtual ~SVCDecoderImpl() {}
+  virtual long EXTAPI Initialize (const SDecodingParam* pParam) {
+    EXPECT_TRUE (gThis == this);
+    return 1;
+  }
+  virtual long EXTAPI Uninitialize() {
+    EXPECT_TRUE (gThis == this);
+    return 2;
+  }
+  virtual DECODING_STATE EXTAPI DecodeFrame (const unsigned char* pSrc,
+      const int iSrcLen, unsigned char** ppDst, int* pStride,
+      int& iWidth, int& iHeight) {
+    EXPECT_TRUE (gThis == this);
+    return static_cast<DECODING_STATE> (3);
+  }
+  virtual DECODING_STATE EXTAPI DecodeFrameNoDelay (const unsigned char* pSrc,
+      const int iSrcLen, unsigned char** ppDst, SBufferInfo* pDstInfo) {
+    EXPECT_TRUE (gThis == this);
+    return static_cast<DECODING_STATE> (4);
+  }
+  virtual DECODING_STATE EXTAPI DecodeFrame2 (const unsigned char* pSrc,
+      const int iSrcLen, unsigned char** ppDst, SBufferInfo* pDstInfo) {
+    EXPECT_TRUE (gThis == this);
+    return static_cast<DECODING_STATE> (5);
+  }
+  virtual DECODING_STATE EXTAPI FlushFrame (unsigned char** ppDst, SBufferInfo* pDstInfo) {
+    EXPECT_TRUE (gThis == this);
+    return static_cast<DECODING_STATE> (5);
+  }
+  virtual DECODING_STATE EXTAPI DecodeFrameEx (const unsigned char* pSrc,
+      const int iSrcLen, unsigned char* pDst, int iDstStride,
+      int& iDstLen, int& iWidth, int& iHeight, int& iColorFormat) {
+    EXPECT_TRUE (gThis == this);
+    return static_cast<DECODING_STATE> (6);
+  }
+  virtual DECODING_STATE EXTAPI DecodeParser (const unsigned char* pSrc,
+      const int iSrcLen, SParserBsInfo* pDstInfo) {
+    EXPECT_TRUE (gThis == this);
+    return static_cast<DECODING_STATE> (7);
+  }
+  virtual long EXTAPI SetOption (DECODER_OPTION eOptionId, void* pOption) {
+    EXPECT_TRUE (gThis == this);
+    return static_cast<DECODING_STATE> (8);
+  }
+  virtual long EXTAPI GetOption (DECODER_OPTION eOptionId, void* pOption) {
+    EXPECT_TRUE (gThis == this);
+    return static_cast<DECODING_STATE> (9);
+  }
+};
+
+TEST (ISVCEncoderTest, CheckFunctionOrder) {
+  SVCEncoderImpl* p = new SVCEncoderImpl;
+  gThis = p;
+  CheckEncoderInterface (p, CheckFunctionOrder);
+  delete p;
+}
+
+TEST (ISVCDecoderTest, CheckFunctionOrder) {
+  SVCDecoderImpl* p = new SVCDecoderImpl;
+  gThis = p;
+  CheckDecoderInterface (p, CheckFunctionOrder);
+  delete p;
+}
+
+struct bool_test_struct {
+  char c;
+  bool b;
+};
+
+TEST (ISVCDecoderEncoderTest, CheckCAbi) {
+  EXPECT_EQ (sizeof (bool), GetBoolSize()) << "Wrong size of bool type";
+  EXPECT_EQ (offsetof (bool_test_struct, b), GetBoolOffset()) << "Wrong alignment of bool in a struct";
+  EXPECT_EQ (sizeof (bool_test_struct), GetBoolStructSize()) << "Wrong size of struct with a bool";
+}
--- a/test/api/decode_api_test.cpp
+++ b/test/api/decode_api_test.cpp
@@ -1,1276 +1,1276 @@
-#include <gtest/gtest.h>
-#include "codec_def.h"
-#include "utils/BufferedData.h"
-#include "utils/FileInputStream.h"
-#include "BaseDecoderTest.h"
-#include "BaseEncoderTest.h"
-#include "wels_common_defs.h"
-#include "utils/HashFunctions.h"
-#include <string>
-#include <vector>
-#include "encode_decode_api_test.h"
-using namespace WelsCommon;
-
-static void TestOutPutTrace (void* ctx, int level, const char* string) {
-  STraceUnit* pTraceUnit = (STraceUnit*) ctx;
-  EXPECT_LE (level, pTraceUnit->iTarLevel);
-}
-
-TEST_P (EncodeDecodeTestAPI, DecoderVclNal) {
-  EncodeDecodeFileParamBase p = GetParam();
-  prepareParamDefault (1, p.slicenum, p.width, p.height, p.frameRate, &param_);
-  encoder_->Uninitialize();
-  int rv = encoder_->InitializeExt (&param_);
-  ASSERT_TRUE (rv == cmResultSuccess);
-
-  int32_t iTraceLevel = WELS_LOG_QUIET;
-  encoder_->SetOption (ENCODER_OPTION_TRACE_LEVEL, &iTraceLevel);
-  decoder_->SetOption (DECODER_OPTION_TRACE_LEVEL, &iTraceLevel);
-
-  ASSERT_TRUE (InitialEncDec (p.width, p.height));
-
-  int iIdx = 0;
-  while (iIdx <= p.numframes) {
-
-    EncodeOneFrame (0);
-
-    //decoding after each encoding frame
-    int vclNal, len = 0;
-    encToDecData (info, len);
-    unsigned char* pData[3] = { NULL };
-    memset (&dstBufInfo_, 0, sizeof (SBufferInfo));
-    rv = decoder_->DecodeFrame2 (info.sLayerInfo[0].pBsBuf, len, pData, &dstBufInfo_);
-    ASSERT_TRUE (rv == cmResultSuccess);
-    rv = decoder_->GetOption (DECODER_OPTION_VCL_NAL, &vclNal);
-    EXPECT_EQ (vclNal, FEEDBACK_UNKNOWN_NAL); //no reconstruction, unknown return
-    rv = decoder_->DecodeFrame2 (NULL, 0, pData, &dstBufInfo_); //reconstruction
-    ASSERT_TRUE (rv == cmResultSuccess);
-    rv = decoder_->GetOption (DECODER_OPTION_VCL_NAL, &vclNal);
-    EXPECT_EQ (vclNal, FEEDBACK_VCL_NAL);
-    iIdx++;
-  } //while
-  //ignore last frame
-}
-
-TEST_P (EncodeDecodeTestAPI, GetOptionFramenum) {
-  EncodeDecodeFileParamBase p = GetParam();
-  prepareParamDefault (1, p.slicenum,  p.width, p.height, p.frameRate, &param_);
-  encoder_->Uninitialize();
-  int rv = encoder_->InitializeExt (&param_);
-  ASSERT_TRUE (rv == cmResultSuccess);
-
-  int32_t iTraceLevel = WELS_LOG_QUIET;
-  encoder_->SetOption (ENCODER_OPTION_TRACE_LEVEL, &iTraceLevel);
-  decoder_->SetOption (DECODER_OPTION_TRACE_LEVEL, &iTraceLevel);
-
-  ASSERT_TRUE (InitialEncDec (p.width, p.height));
-
-  int32_t iEncFrameNum = -1;
-  int32_t iDecFrameNum;
-  int iIdx = 0;
-  while (iIdx <= p.numframes) {
-    EncodeOneFrame (0);
-    //decoding after each encoding frame
-    int len = 0;
-    encToDecData (info, len);
-    unsigned char* pData[3] = { NULL };
-    memset (&dstBufInfo_, 0, sizeof (SBufferInfo));
-    rv = decoder_->DecodeFrame2 (info.sLayerInfo[0].pBsBuf, len, pData, &dstBufInfo_);
-    ASSERT_TRUE (rv == cmResultSuccess);
-    decoder_->GetOption (DECODER_OPTION_FRAME_NUM, &iDecFrameNum);
-    EXPECT_EQ (iDecFrameNum, -1);
-    iEncFrameNum++;
-    rv = decoder_->DecodeFrame2 (NULL, 0, pData, &dstBufInfo_); //reconstruction
-    ASSERT_TRUE (rv == cmResultSuccess);
-    decoder_->GetOption (DECODER_OPTION_FRAME_NUM, &iDecFrameNum);
-    EXPECT_EQ (iEncFrameNum, iDecFrameNum);
-    iIdx++;
-  } //while
-  //ignore last frame
-}
-
-TEST_P (EncodeDecodeTestAPI, GetOptionIDR) {
-  EncodeDecodeFileParamBase p = GetParam();
-  prepareParamDefault (1, p.slicenum,  p.width, p.height, p.frameRate, &param_);
-  encoder_->Uninitialize();
-  int rv = encoder_->InitializeExt (&param_);
-  ASSERT_TRUE (rv == cmResultSuccess);
-
-  //init for encoder
-  // I420: 1(Y) + 1/4(U) + 1/4(V)
-  int32_t iTraceLevel = WELS_LOG_QUIET;
-  encoder_->SetOption (ENCODER_OPTION_TRACE_LEVEL, &iTraceLevel);
-  decoder_->SetOption (DECODER_OPTION_TRACE_LEVEL, &iTraceLevel);
-
-  ASSERT_TRUE (InitialEncDec (p.width, p.height));
-
-  int32_t iEncCurIdrPicId = 0;
-  int32_t iDecCurIdrPicId;
-  int32_t iIDRPeriod = 1;
-  int32_t iSpsPpsIdAddition = 0;
-  int iIdx = 0;
-  while (iIdx <= p.numframes) {
-    iSpsPpsIdAddition = rand() %
-                        2; //the current strategy supports more than 2 modes, but the switch between the modes>2 is not allowed
-    iIDRPeriod = (rand() % 150) + 1;
-    encoder_->SetOption (ENCODER_OPTION_IDR_INTERVAL, &iIDRPeriod);
-    encoder_->SetOption (ENCODER_OPTION_SPS_PPS_ID_STRATEGY, &iSpsPpsIdAddition);
-
-    EncodeOneFrame (0);
-
-    if (info.eFrameType == videoFrameTypeIDR) {
-      iEncCurIdrPicId = iEncCurIdrPicId + 1;
-    }
-    //decoding after each encoding frame
-    int len = 0;
-    encToDecData (info, len);
-    unsigned char* pData[3] = { NULL };
-    memset (&dstBufInfo_, 0, sizeof (SBufferInfo));
-    rv = decoder_->DecodeFrame2 (info.sLayerInfo[0].pBsBuf, len, pData, &dstBufInfo_);
-    ASSERT_TRUE (rv == cmResultSuccess);
-    decoder_->GetOption (DECODER_OPTION_IDR_PIC_ID, &iDecCurIdrPicId);
-    EXPECT_EQ (iDecCurIdrPicId, iEncCurIdrPicId);
-    rv = decoder_->DecodeFrame2 (NULL, 0, pData, &dstBufInfo_); //reconstruction
-    ASSERT_TRUE (rv == cmResultSuccess);
-    decoder_->GetOption (DECODER_OPTION_IDR_PIC_ID, &iDecCurIdrPicId);
-    EXPECT_EQ (iDecCurIdrPicId, iEncCurIdrPicId);
-    iIdx++;
-  } //while
-  //ignore last frame
-}
-
-TEST_P (EncodeDecodeTestAPI, InOutTimeStamp) {
-  EncodeDecodeFileParamBase p = GetParam();
-  prepareParamDefault (1, p.slicenum,  p.width, p.height, p.frameRate, &param_);
-  encoder_->Uninitialize();
-  int rv = encoder_->InitializeExt (&param_);
-  ASSERT_TRUE (rv == cmResultSuccess);
-
-  ASSERT_TRUE (InitialEncDec (p.width, p.height));
-  int32_t iTraceLevel = WELS_LOG_QUIET;
-  encoder_->SetOption (ENCODER_OPTION_TRACE_LEVEL, &iTraceLevel);
-  decoder_->SetOption (DECODER_OPTION_TRACE_LEVEL, &iTraceLevel);
-  int32_t iSpsPpsIdAddition = 1;
-  encoder_->SetOption (ENCODER_OPTION_SPS_PPS_ID_STRATEGY, &iSpsPpsIdAddition);
-  int32_t iIDRPeriod = 60;
-  encoder_->SetOption (ENCODER_OPTION_IDR_INTERVAL, &iIDRPeriod);
-  SLTRConfig sLtrConfigVal;
-  sLtrConfigVal.bEnableLongTermReference = 1;
-  sLtrConfigVal.iLTRRefNum = 1;
-  encoder_->SetOption (ENCODER_OPTION_LTR, &sLtrConfigVal);
-  int32_t iLtrPeriod = 2;
-  encoder_->SetOption (ENCODER_LTR_MARKING_PERIOD, &iLtrPeriod);
-  int iIdx = 0;
-  int iSkipedBytes;
-  unsigned long long uiEncTimeStamp = 100;
-  while (iIdx <= p.numframes) {
-    EncodeOneFrame (1);
-    //decoding after each encoding frame
-    int len = 0;
-    encToDecData (info, len);
-    unsigned char* pData[3] = { NULL };
-    memset (&dstBufInfo_, 0, sizeof (SBufferInfo));
-    uint32_t uiEcIdc = ERROR_CON_SLICE_COPY_CROSS_IDR_FREEZE_RES_CHANGE;
-    decoder_->SetOption (DECODER_OPTION_ERROR_CON_IDC, &uiEcIdc);
-    dstBufInfo_.uiInBsTimeStamp = uiEncTimeStamp;
-    rv = decoder_->DecodeFrame2 (info.sLayerInfo[0].pBsBuf, len, pData, &dstBufInfo_);
-    memset (&dstBufInfo_, 0, sizeof (SBufferInfo));
-    dstBufInfo_.uiInBsTimeStamp = uiEncTimeStamp;
-    rv = decoder_->DecodeFrame2 (NULL, 0, pData, &dstBufInfo_); //reconstruction
-    if (dstBufInfo_.iBufferStatus == 1) {
-      EXPECT_EQ (uiEncTimeStamp, dstBufInfo_.uiOutYuvTimeStamp);
-    }
-    iIdx++;
-    uiEncTimeStamp++;
-  }
-  (void) iSkipedBytes;
-}
-
-TEST_P(EncodeDecodeTestAPI, GetOptionIsRefPic) {
-  EncodeDecodeFileParamBase p = GetParam();
-  prepareParamDefault(1, p.slicenum, p.width, p.height, p.frameRate, &param_);
-  encoder_->Uninitialize();
-  int rv = encoder_->InitializeExt(&param_);
-  ASSERT_TRUE(rv == cmResultSuccess);
-
-  ASSERT_TRUE(InitialEncDec(p.width, p.height));
-  int32_t iTraceLevel = WELS_LOG_QUIET;
-  encoder_->SetOption(ENCODER_OPTION_TRACE_LEVEL, &iTraceLevel);
-  decoder_->SetOption(DECODER_OPTION_TRACE_LEVEL, &iTraceLevel);
-  int iIdx = 0;
-  int iSkipedBytes;
-  int iIsRefPic;
-  decoder_->GetOption(DECODER_OPTION_IS_REF_PIC, &iIsRefPic);
-  ASSERT_EQ(iIsRefPic, -1);
-
-  while (iIdx <= p.numframes) {
-    EncodeOneFrame(1);
-    //decoding after each encoding frame
-    int len = 0;
-    encToDecData(info, len);
-    unsigned char* pData[3] = { NULL };
-    memset(&dstBufInfo_, 0, sizeof(SBufferInfo));
-    rv = decoder_->DecodeFrame2(info.sLayerInfo[0].pBsBuf, len, pData, &dstBufInfo_);
-    memset(&dstBufInfo_, 0, sizeof(SBufferInfo));
-    decoder_->GetOption(DECODER_OPTION_IS_REF_PIC, &iIsRefPic);
-    ASSERT_EQ(iIsRefPic, -1);
-    rv = decoder_->DecodeFrame2(NULL, 0, pData, &dstBufInfo_); //reconstruction
-    if (dstBufInfo_.iBufferStatus == 1) {
-      decoder_->GetOption(DECODER_OPTION_IS_REF_PIC, &iIsRefPic);
-      ASSERT_TRUE(iIsRefPic >= 0);
-    }
-    iIdx++;
-  }
-  (void)iSkipedBytes;
-}
-
-TEST_P (EncodeDecodeTestAPI, GetOptionTid_AVC_NOPREFIX) {
-  SLTRMarkingFeedback m_LTR_Marking_Feedback;
-  SLTRRecoverRequest m_LTR_Recover_Request;
-  m_LTR_Recover_Request.uiIDRPicId = 0;
-  m_LTR_Recover_Request.iLayerId = 0;
-  m_LTR_Marking_Feedback.iLayerId = 0;
-  EncodeDecodeFileParamBase p = GetParam();
-  prepareParamDefault (1, p.slicenum,  p.width, p.height, p.frameRate, &param_);
-  param_.bPrefixNalAddingCtrl = false;
-  param_.iTemporalLayerNum = (rand() % 4) + 1;
-  encoder_->Uninitialize();
-  int rv = encoder_->InitializeExt (&param_);
-  ASSERT_TRUE (rv == cmResultSuccess);
-  m_LTR_Recover_Request.uiFeedbackType = NO_RECOVERY_REQUSET;
-  ASSERT_TRUE (InitialEncDec (p.width, p.height));
-  int32_t iTraceLevel = WELS_LOG_QUIET;
-  encoder_->SetOption (ENCODER_OPTION_TRACE_LEVEL, &iTraceLevel);
-  decoder_->SetOption (DECODER_OPTION_TRACE_LEVEL, &iTraceLevel);
-  int32_t iSpsPpsIdAddition = 1;
-  encoder_->SetOption (ENCODER_OPTION_SPS_PPS_ID_STRATEGY, &iSpsPpsIdAddition);
-  int32_t iIDRPeriod = 60;
-  encoder_->SetOption (ENCODER_OPTION_IDR_INTERVAL, &iIDRPeriod);
-  SLTRConfig sLtrConfigVal;
-  sLtrConfigVal.bEnableLongTermReference = 1;
-  sLtrConfigVal.iLTRRefNum = 1;
-  encoder_->SetOption (ENCODER_OPTION_LTR, &sLtrConfigVal);
-  int32_t iLtrPeriod = 2;
-  encoder_->SetOption (ENCODER_LTR_MARKING_PERIOD, &iLtrPeriod);
-  int iIdx = 0;
-  int iLossIdx = 0;
-  bool bVCLLoss = false;
-  while (iIdx <= p.numframes) {
-    EncodeOneFrame (1);
-    if (m_LTR_Recover_Request.uiFeedbackType == IDR_RECOVERY_REQUEST) {
-      ASSERT_TRUE (info.eFrameType == videoFrameTypeIDR);
-    }
-    //decoding after each encoding frame
-    int len = 0;
-    encToDecData (info, len);
-    unsigned char* pData[3] = { NULL };
-    memset (&dstBufInfo_, 0, sizeof (SBufferInfo));
-    SimulateNALLoss (info.sLayerInfo[0].pBsBuf, len, &m_SLostSim, p.pLossSequence, p.bLostPara, iLossIdx, bVCLLoss);
-    rv = decoder_->DecodeFrame2 (info.sLayerInfo[0].pBsBuf, len, pData, &dstBufInfo_);
-    int iTid = -1;
-    decoder_->GetOption (DECODER_OPTION_TEMPORAL_ID, &iTid);
-    if (iTid != -1) {
-      ASSERT_EQ (iTid, 0);
-    }
-    m_LTR_Recover_Request.uiFeedbackType = NO_RECOVERY_REQUSET;
-    LTRRecoveryRequest (decoder_, encoder_, &m_LTR_Recover_Request, rv, true);
-    rv = decoder_->DecodeFrame2 (NULL, 0, pData, &dstBufInfo_); //reconstruction
-    decoder_->GetOption (DECODER_OPTION_TEMPORAL_ID, &iTid);
-    std::vector<SLostSim>::iterator iter = m_SLostSim.begin();
-    bool bHasVCL = false;
-    for (unsigned int k = 0; k < m_SLostSim.size(); k++) {
-      if (IS_VCL_NAL (iter->eNalType, 0) && iter->isLost == false) {
-        bHasVCL = true;
-        break;
-      }
-      iter++;
-    }
-    (void) bHasVCL;
-    if (iTid != -1) {
-      ASSERT_EQ (iTid, 0);
-    }
-    LTRRecoveryRequest (decoder_, encoder_, &m_LTR_Recover_Request, rv, true);
-    LTRMarkFeedback (decoder_, encoder_, &m_LTR_Marking_Feedback, rv);
-    iIdx++;
-  }
-}
-
-TEST_P (EncodeDecodeTestAPI, GetOptionTid_AVC_WITH_PREFIX_NOLOSS) {
-  SLTRMarkingFeedback m_LTR_Marking_Feedback;
-  SLTRRecoverRequest m_LTR_Recover_Request;
-  m_LTR_Recover_Request.uiIDRPicId = 0;
-  m_LTR_Recover_Request.iLayerId = 0;
-  m_LTR_Marking_Feedback.iLayerId = 0;
-  EncodeDecodeFileParamBase p = GetParam();
-  prepareParamDefault (1, p.slicenum,  p.width, p.height, p.frameRate, &param_);
-  param_.bPrefixNalAddingCtrl = true;
-  param_.iTemporalLayerNum = (rand() % 4) + 1;
-  param_.iSpatialLayerNum = 1;
-  encoder_->Uninitialize();
-  int rv = encoder_->InitializeExt (&param_);
-  ASSERT_TRUE (rv == cmResultSuccess);
-  m_LTR_Recover_Request.uiFeedbackType = NO_RECOVERY_REQUSET;
-
-  ASSERT_TRUE (InitialEncDec (p.width, p.height));
-  int32_t iTraceLevel = WELS_LOG_QUIET;
-  encoder_->SetOption (ENCODER_OPTION_TRACE_LEVEL, &iTraceLevel);
-  decoder_->SetOption (DECODER_OPTION_TRACE_LEVEL, &iTraceLevel);
-  int32_t iSpsPpsIdAddition = 1;
-  encoder_->SetOption (ENCODER_OPTION_SPS_PPS_ID_STRATEGY, &iSpsPpsIdAddition);
-  int32_t iIDRPeriod = 60;
-  encoder_->SetOption (ENCODER_OPTION_IDR_INTERVAL, &iIDRPeriod);
-  SLTRConfig sLtrConfigVal;
-  sLtrConfigVal.bEnableLongTermReference = 1;
-  sLtrConfigVal.iLTRRefNum = 1;
-  encoder_->SetOption (ENCODER_OPTION_LTR, &sLtrConfigVal);
-  int32_t iLtrPeriod = 2;
-  encoder_->SetOption (ENCODER_LTR_MARKING_PERIOD, &iLtrPeriod);
-  int iIdx = 0;
-  while (iIdx <= p.numframes) {
-    EncodeOneFrame (1);
-    if (m_LTR_Recover_Request.uiFeedbackType == IDR_RECOVERY_REQUEST) {
-      ASSERT_TRUE (info.eFrameType == videoFrameTypeIDR);
-    }
-    //decoding after each encoding frame
-    int len = 0;
-    encToDecData (info, len);
-    unsigned char* pData[3] = { NULL };
-    memset (&dstBufInfo_, 0, sizeof (SBufferInfo));
-    ExtractDidNal (&info, len, &m_SLostSim, 0);
-    rv = decoder_->DecodeFrame2 (info.sLayerInfo[0].pBsBuf, len, pData, &dstBufInfo_);
-    int iTid = -1;
-    decoder_->GetOption (DECODER_OPTION_TEMPORAL_ID, &iTid);
-    ASSERT_EQ (iTid, -1);
-    m_LTR_Recover_Request.uiFeedbackType = NO_RECOVERY_REQUSET;
-    LTRRecoveryRequest (decoder_, encoder_, &m_LTR_Recover_Request, rv, true);
-    rv = decoder_->DecodeFrame2 (NULL, 0, pData, &dstBufInfo_); //reconstruction
-    decoder_->GetOption (DECODER_OPTION_TEMPORAL_ID, &iTid);
-    ASSERT_EQ (iTid, info.sLayerInfo[0].uiTemporalId);
-    LTRRecoveryRequest (decoder_, encoder_, &m_LTR_Recover_Request, rv, true);
-    LTRMarkFeedback (decoder_, encoder_, &m_LTR_Marking_Feedback, rv);
-    iIdx++;
-  }
-}
-
-TEST_P (EncodeDecodeTestAPI, GetOptionTid_SVC_L1_NOLOSS) {
-  SLTRMarkingFeedback m_LTR_Marking_Feedback;
-  SLTRRecoverRequest m_LTR_Recover_Request;
-  m_LTR_Recover_Request.uiIDRPicId = 0;
-  m_LTR_Recover_Request.iLayerId = 0;
-  m_LTR_Marking_Feedback.iLayerId = 0;
-  EncodeDecodeFileParamBase p = GetParam();
-  prepareParamDefault (2, p.slicenum,  p.width, p.height, p.frameRate, &param_);
-  param_.iTemporalLayerNum = (rand() % 4) + 1;
-  param_.iSpatialLayerNum = 2;
-  encoder_->Uninitialize();
-  int rv = encoder_->InitializeExt (&param_);
-  ASSERT_TRUE (rv == cmResultSuccess);
-  m_LTR_Recover_Request.uiFeedbackType = NO_RECOVERY_REQUSET;
-
-  ASSERT_TRUE (InitialEncDec (p.width, p.height));
-  int32_t iTraceLevel = WELS_LOG_QUIET;
-  encoder_->SetOption (ENCODER_OPTION_TRACE_LEVEL, &iTraceLevel);
-  decoder_->SetOption (DECODER_OPTION_TRACE_LEVEL, &iTraceLevel);
-  int32_t iSpsPpsIdAddition = 1;
-  encoder_->SetOption (ENCODER_OPTION_SPS_PPS_ID_STRATEGY, &iSpsPpsIdAddition);
-  int32_t iIDRPeriod = 60;
-  encoder_->SetOption (ENCODER_OPTION_IDR_INTERVAL, &iIDRPeriod);
-  SLTRConfig sLtrConfigVal;
-  sLtrConfigVal.bEnableLongTermReference = 1;
-  sLtrConfigVal.iLTRRefNum = 1;
-  encoder_->SetOption (ENCODER_OPTION_LTR, &sLtrConfigVal);
-  int32_t iLtrPeriod = 2;
-  encoder_->SetOption (ENCODER_LTR_MARKING_PERIOD, &iLtrPeriod);
-  int iIdx = 0;
-  while (iIdx <= p.numframes) {
-    EncodeOneFrame (1);
-    if (m_LTR_Recover_Request.uiFeedbackType == IDR_RECOVERY_REQUEST) {
-      ASSERT_TRUE (info.eFrameType == videoFrameTypeIDR);
-    }
-    //decoding after each encoding frame
-    int len = 0;
-    encToDecData (info, len);
-    unsigned char* pData[3] = { NULL };
-    memset (&dstBufInfo_, 0, sizeof (SBufferInfo));
-    ExtractDidNal (&info, len, &m_SLostSim, 1);
-    rv = decoder_->DecodeFrame2 (info.sLayerInfo[0].pBsBuf, len, pData, &dstBufInfo_);
-    int iTid = -1;
-    decoder_->GetOption (DECODER_OPTION_TEMPORAL_ID, &iTid);
-    ASSERT_EQ (iTid, -1);
-    m_LTR_Recover_Request.uiFeedbackType = NO_RECOVERY_REQUSET;
-    LTRRecoveryRequest (decoder_, encoder_, &m_LTR_Recover_Request, rv, true);
-    rv = decoder_->DecodeFrame2 (NULL, 0, pData, &dstBufInfo_); //reconstruction
-    decoder_->GetOption (DECODER_OPTION_TEMPORAL_ID, &iTid);
-    ASSERT_EQ (iTid, info.sLayerInfo[0].uiTemporalId);
-    LTRRecoveryRequest (decoder_, encoder_, &m_LTR_Recover_Request, rv, true);
-    LTRMarkFeedback (decoder_, encoder_, &m_LTR_Marking_Feedback, rv);
-    iIdx++;
-  }
-}
-
-
-
-TEST_P (EncodeDecodeTestAPI, SetOption_Trace) {
-  SLTRMarkingFeedback m_LTR_Marking_Feedback;
-  SLTRRecoverRequest m_LTR_Recover_Request;
-  m_LTR_Recover_Request.uiIDRPicId = 0;
-  m_LTR_Recover_Request.iLayerId = 0;
-  m_LTR_Marking_Feedback.iLayerId = 0;
-  EncodeDecodeFileParamBase p = GetParam();
-  prepareParamDefault (1, p.slicenum,  p.width, p.height, p.frameRate, &param_);
-  param_.iSpatialLayerNum = 1;
-
-  int rv = encoder_->InitializeExt (&param_);
-  ASSERT_TRUE (rv == cmResultSuccess);
-  m_LTR_Recover_Request.uiFeedbackType = NO_RECOVERY_REQUSET;
-
-  ASSERT_TRUE (InitialEncDec (p.width, p.height));
-  int32_t iTraceLevel = WELS_LOG_QUIET;
-  pFunc = TestOutPutTrace;
-  pTraceInfo = &sTrace;
-  sTrace.iTarLevel = iTraceLevel;
-  encoder_->SetOption (ENCODER_OPTION_TRACE_LEVEL, &iTraceLevel);
-  decoder_->SetOption (DECODER_OPTION_TRACE_LEVEL, &iTraceLevel);
-  encoder_->SetOption (ENCODER_OPTION_TRACE_CALLBACK, &pFunc);
-  encoder_->SetOption (ENCODER_OPTION_TRACE_CALLBACK_CONTEXT, &pTraceInfo);
-  decoder_->SetOption (DECODER_OPTION_TRACE_CALLBACK, &pFunc);
-  decoder_->SetOption (DECODER_OPTION_TRACE_CALLBACK_CONTEXT, &pTraceInfo);
-
-
-  int32_t iSpsPpsIdAddition = 1;
-  encoder_->SetOption (ENCODER_OPTION_SPS_PPS_ID_STRATEGY, &iSpsPpsIdAddition);
-  int32_t iIDRPeriod = 60;
-  encoder_->SetOption (ENCODER_OPTION_IDR_INTERVAL, &iIDRPeriod);
-  SLTRConfig sLtrConfigVal;
-  sLtrConfigVal.bEnableLongTermReference = 1;
-  sLtrConfigVal.iLTRRefNum = 1;
-  encoder_->SetOption (ENCODER_OPTION_LTR, &sLtrConfigVal);
-  int32_t iLtrPeriod = 2;
-  encoder_->SetOption (ENCODER_LTR_MARKING_PERIOD, &iLtrPeriod);
-  int iIdx = 0;
-  int iLossIdx = 0;
-  bool bVCLLoss = false;
-  while (iIdx <= p.numframes) {
-    iTraceLevel = rand() % 33;
-    sTrace.iTarLevel = iTraceLevel;
-    encoder_->SetOption (ENCODER_OPTION_TRACE_LEVEL, &iTraceLevel);
-    decoder_->SetOption (DECODER_OPTION_TRACE_LEVEL, &iTraceLevel);
-    EncodeOneFrame (1);
-    if (m_LTR_Recover_Request.uiFeedbackType == IDR_RECOVERY_REQUEST) {
-      ASSERT_TRUE (info.eFrameType == videoFrameTypeIDR);
-    }
-    //decoding after each encoding frame
-    int len = 0;
-    encToDecData (info, len);
-    unsigned char* pData[3] = { NULL };
-    memset (&dstBufInfo_, 0, sizeof (SBufferInfo));
-    ExtractDidNal (&info, len, &m_SLostSim, 0);
-    SimulateNALLoss (info.sLayerInfo[0].pBsBuf, len, &m_SLostSim, p.pLossSequence, p.bLostPara, iLossIdx, bVCLLoss);
-    rv = decoder_->DecodeFrame2 (info.sLayerInfo[0].pBsBuf, len, pData, &dstBufInfo_);
-    m_LTR_Recover_Request.uiFeedbackType = NO_RECOVERY_REQUSET;
-    LTRRecoveryRequest (decoder_, encoder_, &m_LTR_Recover_Request, rv, true);
-    rv = decoder_->DecodeFrame2 (NULL, 0, pData, &dstBufInfo_); //reconstruction
-    LTRRecoveryRequest (decoder_, encoder_, &m_LTR_Recover_Request, rv, true);
-    LTRMarkFeedback (decoder_, encoder_, &m_LTR_Marking_Feedback, rv);
-    iIdx++;
-  }
-}
-
-TEST_P (EncodeDecodeTestAPI, SetOption_Trace_NULL) {
-  SLTRMarkingFeedback m_LTR_Marking_Feedback;
-  SLTRRecoverRequest m_LTR_Recover_Request;
-  m_LTR_Recover_Request.uiIDRPicId = 0;
-  m_LTR_Recover_Request.iLayerId = 0;
-  m_LTR_Marking_Feedback.iLayerId = 0;
-  EncodeDecodeFileParamBase p = GetParam();
-  prepareParamDefault (1, p.slicenum,  p.width, p.height, p.frameRate, &param_);
-  param_.iSpatialLayerNum = 1;
-  int rv = encoder_->InitializeExt (&param_);
-  ASSERT_TRUE (rv == cmResultSuccess);
-  m_LTR_Recover_Request.uiFeedbackType = NO_RECOVERY_REQUSET;
-
-  ASSERT_TRUE (InitialEncDec (p.width, p.height));
-
-  int32_t iTraceLevel = WELS_LOG_QUIET;
-  pFunc = NULL;
-  pTraceInfo = NULL;
-  encoder_->SetOption (ENCODER_OPTION_TRACE_CALLBACK, &pFunc);
-  encoder_->SetOption (ENCODER_OPTION_TRACE_CALLBACK_CONTEXT, &pTraceInfo);
-  decoder_->SetOption (DECODER_OPTION_TRACE_CALLBACK, &pFunc);
-  decoder_->SetOption (DECODER_OPTION_TRACE_CALLBACK_CONTEXT, &pTraceInfo);
-  encoder_->SetOption (ENCODER_OPTION_TRACE_LEVEL, &iTraceLevel);
-  decoder_->SetOption (DECODER_OPTION_TRACE_LEVEL, &iTraceLevel);
-
-  int32_t iSpsPpsIdAddition = 1;
-  encoder_->SetOption (ENCODER_OPTION_SPS_PPS_ID_STRATEGY, &iSpsPpsIdAddition);
-  int32_t iIDRPeriod = 60;
-  encoder_->SetOption (ENCODER_OPTION_IDR_INTERVAL, &iIDRPeriod);
-  SLTRConfig sLtrConfigVal;
-  sLtrConfigVal.bEnableLongTermReference = 1;
-  sLtrConfigVal.iLTRRefNum = 1;
-  encoder_->SetOption (ENCODER_OPTION_LTR, &sLtrConfigVal);
-  int32_t iLtrPeriod = 2;
-  encoder_->SetOption (ENCODER_LTR_MARKING_PERIOD, &iLtrPeriod);
-  int iIdx = 0;
-  int iLossIdx = 0;
-  bool bVCLLoss = false;
-  while (iIdx <= p.numframes) {
-    iTraceLevel = rand() % 33;
-    encoder_->SetOption (ENCODER_OPTION_TRACE_LEVEL, &iTraceLevel);
-    decoder_->SetOption (DECODER_OPTION_TRACE_LEVEL, &iTraceLevel);
-    EncodeOneFrame (1);
-    if (m_LTR_Recover_Request.uiFeedbackType == IDR_RECOVERY_REQUEST) {
-      ASSERT_TRUE (info.eFrameType == videoFrameTypeIDR);
-    }
-    //decoding after each encoding frame
-    int len = 0;
-    encToDecData (info, len);
-    unsigned char* pData[3] = { NULL };
-    memset (&dstBufInfo_, 0, sizeof (SBufferInfo));
-    ExtractDidNal (&info, len, &m_SLostSim, 0);
-    SimulateNALLoss (info.sLayerInfo[0].pBsBuf, len, &m_SLostSim, p.pLossSequence, p.bLostPara, iLossIdx, bVCLLoss);
-    rv = decoder_->DecodeFrame2 (info.sLayerInfo[0].pBsBuf, len, pData, &dstBufInfo_);
-    m_LTR_Recover_Request.uiFeedbackType = NO_RECOVERY_REQUSET;
-    LTRRecoveryRequest (decoder_, encoder_, &m_LTR_Recover_Request, rv, true);
-    rv = decoder_->DecodeFrame2 (NULL, 0, pData, &dstBufInfo_); //reconstruction
-    LTRRecoveryRequest (decoder_, encoder_, &m_LTR_Recover_Request, rv, true);
-    LTRMarkFeedback (decoder_, encoder_, &m_LTR_Marking_Feedback, rv);
-    iIdx++;
-  }
-}
-
-
-
-
-class DecodeCrashTestAPI : public ::testing::TestWithParam<EncodeDecodeFileParamBase>, public EncodeDecodeTestBase {
- public:
-  void SetUp() {
-    EncodeDecodeTestBase::SetUp();
-    ucBuf_ = NULL;
-    ucBuf_ = new unsigned char [1000000];
-    ASSERT_TRUE (ucBuf_ != NULL);
-  }
-
-  void TearDown() {
-    EncodeDecodeTestBase::TearDown();
-    if (NULL != ucBuf_) {
-      delete[] ucBuf_;
-      ucBuf_ = NULL;
-    }
-    ASSERT_TRUE (ucBuf_ == NULL);
-  }
-
-  void prepareParam (int iLayerNum, int iSliceNum, int width, int height, float framerate, SEncParamExt* pParam) {
-    memset (pParam, 0, sizeof (SEncParamExt));
-    EncodeDecodeTestBase::prepareParam (iLayerNum, iSliceNum,  width, height, framerate, pParam);
-  }
-
-  void EncodeOneFrame() {
-    int frameSize = EncPic.iPicWidth * EncPic.iPicHeight * 3 / 2;
-    memset (buf_.data(), iRandValue, (frameSize >> 2));
-    memset (buf_.data() + (frameSize >> 2), rand() % 256, (frameSize - (frameSize >> 2)));
-    int rv = encoder_->EncodeFrame (&EncPic, &info);
-    ASSERT_TRUE (rv == cmResultSuccess || rv == cmUnknownReason);
-  }
- protected:
-  unsigned char* ucBuf_;
-};
-
-struct EncodeDecodeParamBase {
-  int width;
-  int height;
-  float frameRate;
-  int iTarBitrate;
-};
-
-#define NUM_OF_POSSIBLE_RESOLUTION (9)
-static const EncodeDecodeParamBase kParamArray[] = {
-  {160, 90, 6.0f, 250000},
-  {90, 160, 6.0f, 250000},
-  {320, 180, 12.0f, 500000},
-  {180, 320, 12.0f, 500000},
-  {480, 270, 12.0f, 600000},
-  {270, 480, 12.0f, 600000},
-  {640, 360, 24.0f, 800000},
-  {360, 640, 24.0f, 800000},
-  {1280, 720, 24.0f, 1000000},
-};
-
-//#define DEBUG_FILE_SAVE_CRA
-TEST_F (DecodeCrashTestAPI, DecoderCrashTest) {
-  uint32_t uiGet;
-  encoder_->Uninitialize();
-
-  //do tests until crash
-  unsigned int uiLoopRound = 0;
-  unsigned char* pucBuf = ucBuf_;
-  int iDecAuSize;
-#ifdef DEBUG_FILE_SAVE_CRA
-  //open file to save tested BS
-  FILE* fDataFile = fopen ("test_crash.264", "wb");
-  FILE* fLenFile = fopen ("test_crash_len.log", "w");
-  int iFileSize = 0;
-#endif
-
-  //set eCurStrategy for one test
-  EParameterSetStrategy eCurStrategy = CONSTANT_ID;
-  switch (rand() % 7) {
-  case 1:
-    eCurStrategy = INCREASING_ID;
-    break;
-  case 2:
-    eCurStrategy = SPS_LISTING;
-    break;
-  case 3:
-    eCurStrategy = SPS_LISTING_AND_PPS_INCREASING;
-    break;
-  case 6:
-    eCurStrategy = SPS_PPS_LISTING;
-    break;
-  default:
-    //using the initial value
-    break;
-  }
-
-  do {
-    int iTotalFrameNum = (rand() % 100) + 1;
-    int iSeed = rand() % NUM_OF_POSSIBLE_RESOLUTION;
-    EncodeDecodeParamBase p = kParamArray[iSeed];
-#ifdef DEBUG_FILE_SAVE_CRA
-    printf ("using param set %d in loop %d\n", iSeed, uiLoopRound);
-#endif
-    //Initialize Encoder
-    prepareParam (1, 1, p.width, p.height, p.frameRate, &param_);
-    param_.iRCMode = RC_TIMESTAMP_MODE;
-    param_.iTargetBitrate = p.iTarBitrate;
-    param_.uiIntraPeriod = 0;
-    param_.eSpsPpsIdStrategy = eCurStrategy;
-    param_.bEnableBackgroundDetection = true;
-    param_.bEnableSceneChangeDetect = (rand() % 3) ? true : false;
-    param_.bPrefixNalAddingCtrl = (rand() % 2) ? true : false;
-    param_.iEntropyCodingModeFlag = 0;
-    param_.bEnableFrameSkip = true;
-    param_.iMultipleThreadIdc = 0;
-    param_.sSpatialLayers[0].iSpatialBitrate = p.iTarBitrate;
-    param_.sSpatialLayers[0].iMaxSpatialBitrate = p.iTarBitrate << 1;
-    param_.sSpatialLayers[0].sSliceArgument.uiSliceMode = (rand() % 2) ? SM_SIZELIMITED_SLICE : SM_SINGLE_SLICE;
-    if (param_.sSpatialLayers[0].sSliceArgument.uiSliceMode == SM_SIZELIMITED_SLICE) {
-      param_.sSpatialLayers[0].sSliceArgument.uiSliceSizeConstraint = 1400;
-      param_.uiMaxNalSize = 1400;
-    } else {
-      param_.sSpatialLayers[0].sSliceArgument.uiSliceSizeConstraint = 0;
-      param_.uiMaxNalSize = 0;
-    }
-
-    int rv = encoder_->InitializeExt (&param_);
-    ASSERT_TRUE (rv == cmResultSuccess);
-    decoder_->GetOption (DECODER_OPTION_ERROR_CON_IDC, &uiGet);
-    EXPECT_EQ (uiGet, (uint32_t) ERROR_CON_SLICE_COPY); //default value should be ERROR_CON_SLICE_COPY
-    int32_t iTraceLevel = WELS_LOG_QUIET;
-    encoder_->SetOption (ENCODER_OPTION_TRACE_LEVEL, &iTraceLevel);
-    decoder_->SetOption (DECODER_OPTION_TRACE_LEVEL, &iTraceLevel);
-
-    //Start for enc/dec
-    int iIdx = 0;
-    unsigned char* pData[3] = { NULL };
-
-    EncodeDecodeFileParamBase pInput; //to conform with old functions
-    pInput.width =  p.width;
-    pInput.height = p.height;
-    pInput.frameRate = p.frameRate;
-    ASSERT_TRUE (prepareEncDecParam (pInput));
-    while (iIdx++ < iTotalFrameNum) { // loop in frame
-      EncodeOneFrame();
-#ifdef DEBUG_FILE_SAVE_CRA
-      //reset file if file size large
-      if ((info.eFrameType == videoFrameTypeIDR) && (iFileSize >= (1 << 25))) {
-        fclose (fDataFile);
-        fDataFile = fopen ("test_crash.264", "wb");
-        iFileSize = 0;
-        decoder_->Uninitialize();
-
-        SDecodingParam decParam;
-        memset (&decParam, 0, sizeof (SDecodingParam));
-        decParam.uiTargetDqLayer = UCHAR_MAX;
-        decParam.eEcActiveIdc = ERROR_CON_SLICE_COPY;
-        decParam.sVideoProperty.eVideoBsType = VIDEO_BITSTREAM_DEFAULT;
-
-        rv = decoder_->Initialize (&decParam);
-        ASSERT_EQ (0, rv);
-      }
-#endif
-      if (info.eFrameType == videoFrameTypeSkip)
-        continue;
-      //deal with packets
-      unsigned char* pBsBuf;
-      iDecAuSize = 0;
-      pucBuf = ucBuf_; //init buf start pos for decoder usage
-      for (int iLayerNum = 0; iLayerNum < info.iLayerNum; iLayerNum++) {
-        SLayerBSInfo* pLayerBsInfo = &info.sLayerInfo[iLayerNum];
-        pBsBuf = info.sLayerInfo[iLayerNum].pBsBuf;
-        int iTotalNalCnt = pLayerBsInfo->iNalCount;
-        for (int iNalCnt = 0; iNalCnt < iTotalNalCnt; iNalCnt++) {  //loop in NAL
-          int iPacketSize = pLayerBsInfo->pNalLengthInByte[iNalCnt];
-          //packet loss
-          int iLossRateRange = (uiLoopRound % 100) + 1; //1-100
-          int iLossRate = (rand() % iLossRateRange);
-          bool bPacketLost = (rand() % 101) > (100 -
-                                               iLossRate);   // [0, (100-iLossRate)] indicates NO LOSS, (100-iLossRate, 100] indicates LOSS
-          if (!bPacketLost) { //no loss
-            memcpy (pucBuf, pBsBuf, iPacketSize);
-            pucBuf += iPacketSize;
-            iDecAuSize += iPacketSize;
-          }
-#ifdef DEBUG_FILE_SAVE_CRA
-          else {
-            printf ("lost packet size=%d at frame-type=%d at loss rate %d (%d)\n", iPacketSize, info.eFrameType, iLossRate,
-                    iLossRateRange);
-          }
-#endif
-          //update bs info
-          pBsBuf += iPacketSize;
-        } //nal
-      } //layer
-
-#ifdef DEBUG_FILE_SAVE_CRA
-      //save to file
-      fwrite (ucBuf_, 1, iDecAuSize, fDataFile);
-      fflush (fDataFile);
-      iFileSize += iDecAuSize;
-
-      //save to len file
-      unsigned long ulTmp[4];
-      ulTmp[0] = ulTmp[1] = ulTmp[2] = iIdx;
-      ulTmp[3] = iDecAuSize;
-      fwrite (ulTmp, sizeof (unsigned long), 4, fLenFile); // index, timeStamp, data size
-      fflush (fLenFile);
-#endif
-
-      //decode
-      pData[0] = pData[1] = pData[2] = 0;
-      memset (&dstBufInfo_, 0, sizeof (SBufferInfo));
-
-      rv = decoder_->DecodeFrame2 (ucBuf_, iDecAuSize, pData, &dstBufInfo_);
-      rv = decoder_->DecodeFrame2 (NULL, 0, pData, &dstBufInfo_); //reconstruction
-      //guarantee decoder EC status
-      decoder_->GetOption (DECODER_OPTION_ERROR_CON_IDC, &uiGet);
-      EXPECT_EQ (uiGet, (uint32_t) ERROR_CON_SLICE_COPY);
-    } //frame
-    uiLoopRound ++;
-    if (uiLoopRound >= (1 << 30))
-      uiLoopRound = 0;
-#ifdef DEBUG_FILE_SAVE_CRA
-    if (uiLoopRound % 100 == 0)
-      printf ("run %d times.\n", uiLoopRound);
-  } while (1); //while (iLoopRound<100);
-  fclose (fDataFile);
-  fclose (fLenFile);
-#else
-  }
-  while (uiLoopRound < 10);
-#endif
-
-}
-
-const uint32_t kiTotalLayer = 3; //DO NOT CHANGE!
-const uint32_t kiSliceNum = 2; //DO NOT CHANGE!
-const uint32_t kiWidth = 160; //DO NOT CHANGE!
-const uint32_t kiHeight = 96; //DO NOT CHANGE!
-const uint32_t kiFrameRate = 12; //DO NOT CHANGE!
-const uint32_t kiFrameNum = 100; //DO NOT CHANGE!
-const char* const pHashStr[][2] = { //DO NOT CHANGE!
-  // Allow for different output depending on whether averaging is done
-  // vertically or horizontally first when downsampling.
-  { "caaaa3352ab8614e3a35836f5d7c9a528294e953", "326cc236e9ba5277aedc5cf0865dd4cbd2f89fe0" },
-  { "2dc97661e94515d9947a344127062f82814afc2a", "72f36bb33d190979be88077c6166a09767dd2992" },
-  { "106ec96a90412aabea5c0cfa6bfc654a0b5db33e", "998c2947bccf140bde1e43e29376614038eb7c71" }
-};
-
-class DecodeParseAPI : public ::testing::TestWithParam<EncodeDecodeFileParamBase>, public EncodeDecodeTestBase {
- public:
-  DecodeParseAPI() {
-    memset (&BsInfo_, 0, sizeof (SParserBsInfo));
-    fYuv_ = NULL;
-    iWidth_ = 0;
-    iHeight_ = 0;
-    memset (&ctx_, 0, sizeof (SHA1Context));
-  }
-  void SetUp() {
-    SHA1Reset (&ctx_);
-    EncodeDecodeTestBase::SetUp();
-
-    if (decoder_)
-      decoder_->Uninitialize();
-    SDecodingParam decParam;
-    memset (&decParam, 0, sizeof (SDecodingParam));
-    decParam.uiTargetDqLayer = UCHAR_MAX;
-    decParam.eEcActiveIdc = ERROR_CON_SLICE_COPY;
-    decParam.bParseOnly = true;
-    decParam.sVideoProperty.eVideoBsType = VIDEO_BITSTREAM_DEFAULT;
-
-    int rv = decoder_->Initialize (&decParam);
-    ASSERT_EQ (0, rv);
-    memset (&BsInfo_, 0, sizeof (SParserBsInfo));
-    const char* sFileName = "res/CiscoVT2people_160x96_6fps.yuv";
-#if defined(ANDROID_NDK)
-    std::string filename = std::string ("/sdcard/") + sFileName;
-    ASSERT_TRUE ((fYuv_ = fopen (filename.c_str(), "rb")) != NULL);
-#else
-    ASSERT_TRUE ((fYuv_ = fopen (sFileName, "rb")) != NULL);
-#endif
-    iWidth_ = kiWidth;
-    iHeight_ = kiHeight;
-  }
-  void TearDown() {
-    EncodeDecodeTestBase::TearDown();
-    if (fYuv_ != NULL) {
-      fclose (fYuv_);
-      fYuv_ = NULL;
-    }
-  }
-
-  bool prepareEncDecParam (const EncodeDecodeFileParamBase p) {
-    if (!EncodeDecodeTestBase::prepareEncDecParam (p))
-      return false;
-    unsigned char* pTmpPtr = BsInfo_.pDstBuff; //store for restore
-    memset (&BsInfo_, 0, sizeof (SParserBsInfo));
-    BsInfo_.pDstBuff = pTmpPtr;
-    return true;
-  }
-
-  void MockInputData (uint8_t* pData, int32_t iSize) {
-    int32_t iCurr = 0;
-    while (iCurr < iSize) {
-      * (pData + iCurr) = (* (pData + iCurr) + (rand() % 20) + 256) & 0x00ff;
-      iCurr++;
-    }
-  }
-
-  void EncodeOneFrame (bool bMock) {
-    int iFrameSize = iWidth_ * iHeight_ * 3 / 2;
-    int iSize = (int) fread (buf_.data(), sizeof (char), iFrameSize, fYuv_);
-    if (feof (fYuv_) || iSize != iFrameSize) {
-      rewind (fYuv_);
-      iSize = (int) fread (buf_.data(), sizeof (char), iFrameSize, fYuv_);
-      ASSERT_TRUE (iSize == iFrameSize);
-    }
-    if (bMock) {
-      MockInputData (buf_.data(), iWidth_ * iHeight_);
-    }
-    int rv = encoder_->EncodeFrame (&EncPic, &info);
-    ASSERT_TRUE (rv == cmResultSuccess || rv == cmUnknownReason);
-  }
-
-  void prepareParam (int iLayerNum, int iSliceNum, int width, int height, float framerate, SEncParamExt* pParam) {
-    memset (pParam, 0, sizeof (SEncParamExt));
-    EncodeDecodeTestBase::prepareParam (iLayerNum, iSliceNum,  width, height, framerate, pParam);
-  }
-
- protected:
-  SParserBsInfo BsInfo_;
-  FILE* fYuv_;
-  int iWidth_;
-  int iHeight_;
-  SHA1Context ctx_;
-};
-
-//#define DEBUG_FILE_SAVE_PARSEONLY_GENERAL
-TEST_F (DecodeParseAPI, ParseOnly_General) {
-  EncodeDecodeFileParamBase p;
-  p.width = iWidth_;
-  p.height = iHeight_;
-  p.frameRate = kiFrameRate;
-  p.numframes = kiFrameNum;
-  prepareParam (kiTotalLayer, kiSliceNum, p.width, p.height, p.frameRate, &param_);
-  param_.iSpatialLayerNum = kiTotalLayer;
-  encoder_->Uninitialize();
-  int rv = encoder_->InitializeExt (&param_);
-  ASSERT_TRUE (rv == 0);
-  int32_t iTraceLevel = WELS_LOG_QUIET;
-  encoder_->SetOption (ENCODER_OPTION_TRACE_LEVEL, &iTraceLevel);
-  decoder_->SetOption (DECODER_OPTION_TRACE_LEVEL, &iTraceLevel);
-  uint32_t uiTargetLayerId = rand() % kiTotalLayer; //run only once
-#ifdef DEBUG_FILE_SAVE_PARSEONLY_GENERAL
-  FILE* fDec = fopen ("output.264", "wb");
-  FILE* fEnc = fopen ("enc.264", "wb");
-  FILE* fExtract = fopen ("extract.264", "wb");
-#endif
-  if (uiTargetLayerId < kiTotalLayer) { //should always be true
-    //Start for enc
-    int iLen = 0;
-    ASSERT_TRUE (prepareEncDecParam (p));
-    int iFrame = 0;
-
-    while (iFrame < p.numframes) {
-      //encode
-      EncodeOneFrame (0);
-      //extract target layer data
-      encToDecData (info, iLen);
-#ifdef DEBUG_FILE_SAVE_PARSEONLY_GENERAL
-      fwrite (info.sLayerInfo[0].pBsBuf, iLen, 1, fEnc);
-#endif
-      ExtractDidNal (&info, iLen, &m_SLostSim, uiTargetLayerId);
-#ifdef DEBUG_FILE_SAVE_PARSEONLY_GENERAL
-      fwrite (info.sLayerInfo[0].pBsBuf, iLen, 1, fExtract);
-#endif
-      //parseonly
-      //BsInfo_.pDstBuff = new unsigned char [1000000];
-      rv = decoder_->DecodeParser (info.sLayerInfo[0].pBsBuf, iLen, &BsInfo_);
-      EXPECT_TRUE (rv == 0);
-      EXPECT_TRUE (BsInfo_.iNalNum == 0);
-      rv = decoder_->DecodeParser (NULL, 0, &BsInfo_);
-      EXPECT_TRUE (rv == 0);
-      EXPECT_TRUE (BsInfo_.iNalNum != 0);
-      //get final output bs
-      iLen = 0;
-      int i = 0;
-      while (i < BsInfo_.iNalNum) {
-        iLen += BsInfo_.pNalLenInByte[i];
-        i++;
-      }
-#ifdef DEBUG_FILE_SAVE_PARSEONLY_GENERAL
-      fwrite (BsInfo_.pDstBuff, iLen, 1, fDec);
-#endif
-      SHA1Input (&ctx_, BsInfo_.pDstBuff, iLen);
-      iFrame++;
-    }
-    //calculate final SHA1 value
-    unsigned char digest[SHA_DIGEST_LENGTH];
-    SHA1Result (&ctx_, digest);
-    if (!HasFatalFailure()) {
-      CompareHashAnyOf (digest, pHashStr[uiTargetLayerId], sizeof * pHashStr / sizeof** pHashStr);
-    }
-  } //while
-#ifdef DEBUG_FILE_SAVE_PARSEONLY_GENERAL
-  fclose (fEnc);
-  fclose (fExtract);
-  fclose (fDec);
-#endif
-}
-
-//This case is for one layer only, for incomplete frame input
-//First slice is loss for random one picture with 2 slices per pic
-TEST_F (DecodeParseAPI, ParseOnly_SpecSliceLoss) {
-  int32_t iLayerNum = 1;
-  int32_t iSliceNum = 2;
-  EncodeDecodeFileParamBase p;
-  p.width = iWidth_;
-  p.height = iHeight_;
-  p.frameRate = kiFrameRate;
-  p.numframes = 5;
-  prepareParam (iLayerNum, iSliceNum, p.width, p.height, p.frameRate, &param_);
-  param_.iSpatialLayerNum = iLayerNum;
-  encoder_->Uninitialize();
-  int rv = encoder_->InitializeExt (&param_);
-  ASSERT_TRUE (rv == 0);
-  int32_t iTraceLevel = WELS_LOG_QUIET;
-  encoder_->SetOption (ENCODER_OPTION_TRACE_LEVEL, &iTraceLevel);
-  decoder_->SetOption (DECODER_OPTION_TRACE_LEVEL, &iTraceLevel);
-
-  int32_t iMissedPicNum = rand() % (p.numframes - 1) + 1; //IDR no loss
-  //Start for enc
-  int iLen = 0;
-  uint32_t uiGet;
-  ASSERT_TRUE (prepareEncDecParam (p));
-  int iFrame = 0;
-
-  while (iFrame < p.numframes) {
-    //encode
-    EncodeOneFrame (0);
-    //parseonly
-    if (iFrame == iMissedPicNum) { //make current frame partly missing
-      //Frame: P, first slice loss
-      int32_t iTotalSliceSize = 0;
-      encToDecSliceData (0, 0, info, iTotalSliceSize); //slice 1 lost
-      encToDecSliceData (0, 1, info, iLen); //slice 2
-      decoder_->GetOption (DECODER_OPTION_ERROR_CON_IDC, &uiGet);
-      EXPECT_EQ (uiGet, (uint32_t) ERROR_CON_DISABLE);
-      rv = decoder_->DecodeParser (info.sLayerInfo[0].pBsBuf + iTotalSliceSize, iLen, &BsInfo_);
-      EXPECT_TRUE (rv == 0);
-      EXPECT_TRUE (BsInfo_.iNalNum == 0);
-      rv = decoder_->DecodeParser (NULL, 0, &BsInfo_);
-      EXPECT_TRUE (rv != 0);
-    } else { //normal frame, complete
-      encToDecData (info, iLen);
-      rv = decoder_->DecodeParser (info.sLayerInfo[0].pBsBuf, iLen, &BsInfo_);
-      EXPECT_TRUE (rv == 0); //parse correct
-      EXPECT_TRUE (BsInfo_.iNalNum == 0);
-      rv = decoder_->DecodeParser (NULL, 0, &BsInfo_);
-      if (iFrame < iMissedPicNum) { //correct frames, all OK with output
-        EXPECT_TRUE (rv == 0);
-        EXPECT_TRUE (BsInfo_.iNalNum != 0);
-      } else { //(iFrame > iMissedPicNum), should output nothing as error
-        EXPECT_TRUE (rv != 0);
-        EXPECT_TRUE (BsInfo_.iNalNum == 0);
-      }
-    }
-    iFrame++;
-  } //while
-}
-
-TEST_F (DecodeParseAPI, ParseOnly_SpecStatistics) {
-  //set params
-  int32_t iLayerNum = 1;
-  int32_t iSliceNum = 1;
-  EncodeDecodeFileParamBase p;
-  const int iLoopNum = 10;
-  p.frameRate = kiFrameRate;
-  p.numframes = 2;  //encode 2 frames in each test
-  p.width = iWidth_ = 16;
-  p.height = iHeight_ = 16; //default start width/height = 16, will be modified each time
-  int iTotalFrmCnt = 0;
-  for (int i = 0; i < iLoopNum; ++i) {
-    prepareParam (iLayerNum, iSliceNum, p.width, p.height, p.frameRate, &param_);
-    param_.iSpatialLayerNum = iLayerNum;
-    param_.sSpatialLayers[0].iDLayerQp = 40; //to revent size too limited to encoding fail
-    encoder_->Uninitialize();
-    int rv = encoder_->InitializeExt (&param_);
-    ASSERT_TRUE (rv == 0);
-    int32_t iTraceLevel = WELS_LOG_QUIET;
-    rv = encoder_->SetOption (ENCODER_OPTION_TRACE_LEVEL, &iTraceLevel);
-    ASSERT_TRUE (rv == 0);
-    rv = decoder_->SetOption (DECODER_OPTION_TRACE_LEVEL, &iTraceLevel);
-    ASSERT_TRUE (rv == 0);
-    //Start for enc
-    int iLen = 0;
-    ASSERT_TRUE (prepareEncDecParam (p));
-    int iFrame = 0;
-    while (iFrame < p.numframes) {
-      EncodeOneFrame (0);
-      encToDecData (info, iLen);
-      iFrame++;
-      iTotalFrmCnt++;
-      rv = decoder_->DecodeParser (info.sLayerInfo[0].pBsBuf, iLen, &BsInfo_);
-      ASSERT_TRUE (rv == 0);
-      ASSERT_TRUE (BsInfo_.iNalNum == 0);
-      rv = decoder_->DecodeParser (NULL, 0, &BsInfo_);
-      ASSERT_TRUE (rv == 0);
-      ASSERT_TRUE (BsInfo_.iNalNum != 0);
-      SDecoderStatistics sDecStat;
-      rv = decoder_->GetOption (DECODER_OPTION_GET_STATISTICS, &sDecStat);
-      ASSERT_TRUE (rv == 0);
-      uint32_t uiProfile, uiLevel;
-      rv = decoder_->GetOption (DECODER_OPTION_PROFILE, &uiProfile);
-      ASSERT_TRUE (rv == 0);
-      rv = decoder_->GetOption (DECODER_OPTION_LEVEL, &uiLevel);
-      ASSERT_TRUE (rv == 0);
-
-      ASSERT_EQ (sDecStat.uiWidth, (unsigned int) p.width);
-      ASSERT_EQ (sDecStat.uiHeight, (unsigned int) p.height);
-      ASSERT_EQ (sDecStat.uiResolutionChangeTimes, (unsigned int) (i + 1));
-      EXPECT_EQ (sDecStat.iCurrentActiveSpsId, 0);
-      EXPECT_EQ (sDecStat.iCurrentActivePpsId, 0);
-      ASSERT_EQ (sDecStat.uiDecodedFrameCount, (unsigned int) iTotalFrmCnt);
-      ASSERT_EQ (sDecStat.uiProfile, uiProfile);
-      ASSERT_EQ (sDecStat.uiLevel, uiLevel);
-      EXPECT_TRUE (sDecStat.fActualAverageFrameSpeedInMs != 0.);
-      EXPECT_TRUE (sDecStat.fAverageFrameSpeedInMs != 0.);
-      EXPECT_TRUE (sDecStat.iAvgLumaQp != 0);
-      EXPECT_EQ (sDecStat.uiIDRCorrectNum, (unsigned int) (i + 1));
-    }
-    //set next width & height
-    p.width += 16;
-    p.height += 16;
-    if ((unsigned int) p.width > kiWidth) //exceeds max frame size
-      p.width = 16;
-    if ((unsigned int) p.height > kiHeight)
-      p.height = 16;
-    iWidth_ = p.width;
-    iHeight_ = p.height;
-  }
-}
-
-
-//Test parseonly crash cases
-class DecodeParseCrashAPI : public DecodeParseAPI {
- public:
-  DecodeParseCrashAPI() {
-  }
-  void SetUp() {
-    DecodeParseAPI::SetUp();
-    iWidth_ = 1280;
-    iHeight_ = 720;
-
-    ucBuf_ = NULL;
-    ucBuf_ = new unsigned char[1000000];
-    ASSERT_TRUE (ucBuf_ != NULL);
-
-  }
-  void TearDown() {
-    DecodeParseAPI::TearDown();
-    if (NULL != ucBuf_) {
-      delete[] ucBuf_;
-      ucBuf_ = NULL;
-    }
-    ASSERT_TRUE (ucBuf_ == NULL);
-  }
-
- protected:
-  unsigned char* ucBuf_;
-};
-
-//#define DEBUG_FILE_SAVE_PARSE_CRA1
-TEST_F (DecodeParseCrashAPI, ParseOnlyCrash_General) {
-  if (fYuv_)
-    fclose (fYuv_);
-  const char* sFileName = "res/Cisco_Absolute_Power_1280x720_30fps.yuv";
-#if defined(ANDROID_NDK)
-  std::string filename = std::string ("/sdcard/") + sFileName;
-  ASSERT_TRUE ((fYuv_ = fopen (filename.c_str(), "rb")) != NULL);
-#else
-  ASSERT_TRUE ((fYuv_ = fopen (sFileName, "rb")) != NULL);
-#endif
-  uint32_t uiGet;
-  encoder_->Uninitialize();
-  //do tests until crash
-  unsigned int uiLoopRound = 0;
-  unsigned char* pucBuf = ucBuf_;
-  int iDecAuSize;
-#ifdef DEBUG_FILE_SAVE_PARSE_CRA1
-  //open file to save tested BS
-  FILE* fDataFile = fopen ("test_parseonly_crash.264", "wb");
-  FILE* fLenFile = fopen ("test_parseonly_crash_len.log", "w");
-  int iFileSize = 0;
-#endif
-
-  do {
-#ifdef DEBUG_FILE_SAVE_PARSE_CRA1
-    int iTotalFrameNum = (rand() % 1200) + 1;
-#else
-    int iTotalFrameNum = (rand() % 100) + 1;
-#endif
-    EncodeDecodeParamBase p = kParamArray[8]; //720p by default
-
-    //Initialize Encoder
-    prepareParam (1, 4, p.width, p.height, p.frameRate, &param_);
-    param_.iRCMode = RC_TIMESTAMP_MODE;
-    param_.iTargetBitrate = p.iTarBitrate;
-    param_.uiIntraPeriod = 0;
-    param_.eSpsPpsIdStrategy = CONSTANT_ID;
-    param_.bEnableBackgroundDetection = true;
-    param_.bEnableSceneChangeDetect = (rand() % 3) ? true : false;
-    param_.bPrefixNalAddingCtrl = 0;// (rand() % 2) ? true : false;
-    param_.iEntropyCodingModeFlag = 0;
-    param_.bEnableFrameSkip = true;
-    param_.iMultipleThreadIdc = 0;
-    param_.sSpatialLayers[0].iSpatialBitrate = p.iTarBitrate;
-    param_.sSpatialLayers[0].iMaxSpatialBitrate = p.iTarBitrate << 1;
-    param_.sSpatialLayers[0].sSliceArgument.uiSliceMode =
-      SM_FIXEDSLCNUM_SLICE; // (rand() % 2) ? SM_SIZELIMITED_SLICE : SM_SINGLE_SLICE;
-    if (param_.sSpatialLayers[0].sSliceArgument.uiSliceMode == SM_SIZELIMITED_SLICE) {
-      param_.sSpatialLayers[0].sSliceArgument.uiSliceSizeConstraint = 1400;
-      param_.uiMaxNalSize = 1400;
-    } else {
-      param_.sSpatialLayers[0].sSliceArgument.uiSliceSizeConstraint = 0;
-      param_.uiMaxNalSize = 0;
-    }
-
-    int rv = encoder_->InitializeExt (&param_);
-    ASSERT_TRUE (rv == cmResultSuccess);
-    decoder_->GetOption (DECODER_OPTION_ERROR_CON_IDC, &uiGet);
-    EXPECT_EQ (uiGet, (uint32_t)ERROR_CON_DISABLE); //default value should be ERROR_CON_SLICE_COPY
-    int32_t iTraceLevel = WELS_LOG_QUIET;
-    encoder_->SetOption (ENCODER_OPTION_TRACE_LEVEL, &iTraceLevel);
-    decoder_->SetOption (DECODER_OPTION_TRACE_LEVEL, &iTraceLevel);
-
-    //Start for enc/dec
-    int iIdx = 0;
-    unsigned char* pData[3] = { NULL };
-
-    EncodeDecodeFileParamBase pInput; //to conform with old functions
-    pInput.width = p.width;
-    pInput.height = p.height;
-    pInput.frameRate = p.frameRate;
-    prepareEncDecParam (pInput);
-    while (iIdx++ < iTotalFrameNum) { // loop in frame
-      EncodeOneFrame (1);
-#ifdef DEBUG_FILE_SAVE_PARSE_CRA1
-      //reset file if file size large
-      if ((info.eFrameType == videoFrameTypeIDR) && (iFileSize >= (1 << 25))) {
-        fclose (fDataFile);
-        fclose (fLenFile);
-        fDataFile = fopen ("test_parseonly_crash.264", "wb");
-        fLenFile = fopen ("test_parseonly_crash_len.log", "w");
-        iFileSize = 0;
-        decoder_->Uninitialize();
-
-        SDecodingParam decParam;
-        memset (&decParam, 0, sizeof (SDecodingParam));
-        decParam.uiTargetDqLayer = UCHAR_MAX;
-        decParam.eEcActiveIdc = ERROR_CON_DISABLE;
-        decParam.bParseOnly = true;
-        decParam.sVideoProperty.eVideoBsType = VIDEO_BITSTREAM_DEFAULT;
-
-        rv = decoder_->Initialize (&decParam);
-        ASSERT_EQ (0, rv);
-      }
-#endif
-      if (info.eFrameType == videoFrameTypeSkip)
-        continue;
-      //deal with packets
-      unsigned char* pBsBuf;
-      iDecAuSize = 0;
-      pucBuf = ucBuf_; //init buf start pos for decoder usage
-      for (int iLayerNum = 0; iLayerNum < info.iLayerNum; iLayerNum++) {
-        SLayerBSInfo* pLayerBsInfo = &info.sLayerInfo[iLayerNum];
-        pBsBuf = info.sLayerInfo[iLayerNum].pBsBuf;
-        int iTotalNalCnt = pLayerBsInfo->iNalCount;
-        for (int iNalCnt = 0; iNalCnt < iTotalNalCnt; iNalCnt++) {  //loop in NAL
-          int iPacketSize = pLayerBsInfo->pNalLengthInByte[iNalCnt];
-          //packet loss
-          int iLossRateRange = (uiLoopRound % 20) + 1; //1-100
-          int iLossRate = (rand() % iLossRateRange);
-          bool bPacketLost = (rand() % 101) > (100 -
-                                               iLossRate);   // [0, (100-iLossRate)] indicates NO LOSS, (100-iLossRate, 100] indicates LOSS
-          if (!bPacketLost) { //no loss
-            memcpy (pucBuf, pBsBuf, iPacketSize);
-            pucBuf += iPacketSize;
-            iDecAuSize += iPacketSize;
-          }
-          //update bs info
-          pBsBuf += iPacketSize;
-        } //nal
-      } //layer
-
-#ifdef DEBUG_FILE_SAVE_PARSE_CRA1
-      //save to file
-      if (iDecAuSize != 0) {
-        fwrite (ucBuf_, 1, iDecAuSize, fDataFile);
-        fflush (fDataFile);
-        iFileSize += iDecAuSize;
-      }
-
-      //save to len file
-      unsigned long ulTmp[4];
-      ulTmp[0] = ulTmp[1] = ulTmp[2] = iIdx;
-      ulTmp[3] = iDecAuSize;
-      fwrite (ulTmp, sizeof (unsigned long), 4, fLenFile); // index, timeStamp, data size
-      fflush (fLenFile);
-#endif
-
-      //decode
-      pData[0] = pData[1] = pData[2] = 0;
-      memset (&BsInfo_, 0, sizeof (SParserBsInfo));
-
-      rv = decoder_->DecodeParser (ucBuf_, iDecAuSize, &BsInfo_);
-      rv = decoder_->DecodeParser (NULL, 0, &BsInfo_); //reconstruction
-      //guarantee decoder EC status
-      decoder_->GetOption (DECODER_OPTION_ERROR_CON_IDC, &uiGet);
-      EXPECT_EQ (uiGet, (uint32_t)ERROR_CON_DISABLE);
-    } //frame
-    uiLoopRound++;
-    if (uiLoopRound >= (1 << 30))
-      uiLoopRound = 0;
-#ifdef DEBUG_FILE_SAVE_PARSE_CRA1
-    if (uiLoopRound % 10 == 0)
-      printf ("run %d times.\n", uiLoopRound);
-  } while (1);
-  fclose (fDataFile);
-  fclose (fLenFile);
-#else
-  }
-  while (0);
-#endif
-
-}
-
+#include <gtest/gtest.h>
+#include "codec_def.h"
+#include "utils/BufferedData.h"
+#include "utils/FileInputStream.h"
+#include "BaseDecoderTest.h"
+#include "BaseEncoderTest.h"
+#include "wels_common_defs.h"
+#include "utils/HashFunctions.h"
+#include <string>
+#include <vector>
+#include "encode_decode_api_test.h"
+using namespace WelsCommon;
+
+static void TestOutPutTrace (void* ctx, int level, const char* string) {
+  STraceUnit* pTraceUnit = (STraceUnit*) ctx;
+  EXPECT_LE (level, pTraceUnit->iTarLevel);
+}
+
+TEST_P (EncodeDecodeTestAPI, DecoderVclNal) {
+  EncodeDecodeFileParamBase p = GetParam();
+  prepareParamDefault (1, p.slicenum, p.width, p.height, p.frameRate, &param_);
+  encoder_->Uninitialize();
+  int rv = encoder_->InitializeExt (&param_);
+  ASSERT_TRUE (rv == cmResultSuccess);
+
+  int32_t iTraceLevel = WELS_LOG_QUIET;
+  encoder_->SetOption (ENCODER_OPTION_TRACE_LEVEL, &iTraceLevel);
+  decoder_->SetOption (DECODER_OPTION_TRACE_LEVEL, &iTraceLevel);
+
+  ASSERT_TRUE (InitialEncDec (p.width, p.height));
+
+  int iIdx = 0;
+  while (iIdx <= p.numframes) {
+
+    EncodeOneFrame (0);
+
+    //decoding after each encoding frame
+    int vclNal, len = 0;
+    encToDecData (info, len);
+    unsigned char* pData[3] = { NULL };
+    memset (&dstBufInfo_, 0, sizeof (SBufferInfo));
+    rv = decoder_->DecodeFrame2 (info.sLayerInfo[0].pBsBuf, len, pData, &dstBufInfo_);
+    ASSERT_TRUE (rv == cmResultSuccess);
+    rv = decoder_->GetOption (DECODER_OPTION_VCL_NAL, &vclNal);
+    EXPECT_EQ (vclNal, FEEDBACK_UNKNOWN_NAL); //no reconstruction, unknown return
+    rv = decoder_->DecodeFrame2 (NULL, 0, pData, &dstBufInfo_); //reconstruction
+    ASSERT_TRUE (rv == cmResultSuccess);
+    rv = decoder_->GetOption (DECODER_OPTION_VCL_NAL, &vclNal);
+    EXPECT_EQ (vclNal, FEEDBACK_VCL_NAL);
+    iIdx++;
+  } //while
+  //ignore last frame
+}
+
+TEST_P (EncodeDecodeTestAPI, GetOptionFramenum) {
+  EncodeDecodeFileParamBase p = GetParam();
+  prepareParamDefault (1, p.slicenum,  p.width, p.height, p.frameRate, &param_);
+  encoder_->Uninitialize();
+  int rv = encoder_->InitializeExt (&param_);
+  ASSERT_TRUE (rv == cmResultSuccess);
+
+  int32_t iTraceLevel = WELS_LOG_QUIET;
+  encoder_->SetOption (ENCODER_OPTION_TRACE_LEVEL, &iTraceLevel);
+  decoder_->SetOption (DECODER_OPTION_TRACE_LEVEL, &iTraceLevel);
+
+  ASSERT_TRUE (InitialEncDec (p.width, p.height));
+
+  int32_t iEncFrameNum = -1;
+  int32_t iDecFrameNum;
+  int iIdx = 0;
+  while (iIdx <= p.numframes) {
+    EncodeOneFrame (0);
+    //decoding after each encoding frame
+    int len = 0;
+    encToDecData (info, len);
+    unsigned char* pData[3] = { NULL };
+    memset (&dstBufInfo_, 0, sizeof (SBufferInfo));
+    rv = decoder_->DecodeFrame2 (info.sLayerInfo[0].pBsBuf, len, pData, &dstBufInfo_);
+    ASSERT_TRUE (rv == cmResultSuccess);
+    decoder_->GetOption (DECODER_OPTION_FRAME_NUM, &iDecFrameNum);
+    EXPECT_EQ (iDecFrameNum, -1);
+    iEncFrameNum++;
+    rv = decoder_->DecodeFrame2 (NULL, 0, pData, &dstBufInfo_); //reconstruction
+    ASSERT_TRUE (rv == cmResultSuccess);
+    decoder_->GetOption (DECODER_OPTION_FRAME_NUM, &iDecFrameNum);
+    EXPECT_EQ (iEncFrameNum, iDecFrameNum);
+    iIdx++;
+  } //while
+  //ignore last frame
+}
+
+TEST_P (EncodeDecodeTestAPI, GetOptionIDR) {
+  EncodeDecodeFileParamBase p = GetParam();
+  prepareParamDefault (1, p.slicenum,  p.width, p.height, p.frameRate, &param_);
+  encoder_->Uninitialize();
+  int rv = encoder_->InitializeExt (&param_);
+  ASSERT_TRUE (rv == cmResultSuccess);
+
+  //init for encoder
+  // I420: 1(Y) + 1/4(U) + 1/4(V)
+  int32_t iTraceLevel = WELS_LOG_QUIET;
+  encoder_->SetOption (ENCODER_OPTION_TRACE_LEVEL, &iTraceLevel);
+  decoder_->SetOption (DECODER_OPTION_TRACE_LEVEL, &iTraceLevel);
+
+  ASSERT_TRUE (InitialEncDec (p.width, p.height));
+
+  int32_t iEncCurIdrPicId = 0;
+  int32_t iDecCurIdrPicId;
+  int32_t iIDRPeriod = 1;
+  int32_t iSpsPpsIdAddition = 0;
+  int iIdx = 0;
+  while (iIdx <= p.numframes) {
+    iSpsPpsIdAddition = rand() %
+                        2; //the current strategy supports more than 2 modes, but the switch between the modes>2 is not allowed
+    iIDRPeriod = (rand() % 150) + 1;
+    encoder_->SetOption (ENCODER_OPTION_IDR_INTERVAL, &iIDRPeriod);
+    encoder_->SetOption (ENCODER_OPTION_SPS_PPS_ID_STRATEGY, &iSpsPpsIdAddition);
+
+    EncodeOneFrame (0);
+
+    if (info.eFrameType == videoFrameTypeIDR) {
+      iEncCurIdrPicId = iEncCurIdrPicId + 1;
+    }
+    //decoding after each encoding frame
+    int len = 0;
+    encToDecData (info, len);
+    unsigned char* pData[3] = { NULL };
+    memset (&dstBufInfo_, 0, sizeof (SBufferInfo));
+    rv = decoder_->DecodeFrame2 (info.sLayerInfo[0].pBsBuf, len, pData, &dstBufInfo_);
+    ASSERT_TRUE (rv == cmResultSuccess);
+    decoder_->GetOption (DECODER_OPTION_IDR_PIC_ID, &iDecCurIdrPicId);
+    EXPECT_EQ (iDecCurIdrPicId, iEncCurIdrPicId);
+    rv = decoder_->DecodeFrame2 (NULL, 0, pData, &dstBufInfo_); //reconstruction
+    ASSERT_TRUE (rv == cmResultSuccess);
+    decoder_->GetOption (DECODER_OPTION_IDR_PIC_ID, &iDecCurIdrPicId);
+    EXPECT_EQ (iDecCurIdrPicId, iEncCurIdrPicId);
+    iIdx++;
+  } //while
+  //ignore last frame
+}
+
+TEST_P (EncodeDecodeTestAPI, InOutTimeStamp) {
+  EncodeDecodeFileParamBase p = GetParam();
+  prepareParamDefault (1, p.slicenum,  p.width, p.height, p.frameRate, &param_);
+  encoder_->Uninitialize();
+  int rv = encoder_->InitializeExt (&param_);
+  ASSERT_TRUE (rv == cmResultSuccess);
+
+  ASSERT_TRUE (InitialEncDec (p.width, p.height));
+  int32_t iTraceLevel = WELS_LOG_QUIET;
+  encoder_->SetOption (ENCODER_OPTION_TRACE_LEVEL, &iTraceLevel);
+  decoder_->SetOption (DECODER_OPTION_TRACE_LEVEL, &iTraceLevel);
+  int32_t iSpsPpsIdAddition = 1;
+  encoder_->SetOption (ENCODER_OPTION_SPS_PPS_ID_STRATEGY, &iSpsPpsIdAddition);
+  int32_t iIDRPeriod = 60;
+  encoder_->SetOption (ENCODER_OPTION_IDR_INTERVAL, &iIDRPeriod);
+  SLTRConfig sLtrConfigVal;
+  sLtrConfigVal.bEnableLongTermReference = 1;
+  sLtrConfigVal.iLTRRefNum = 1;
+  encoder_->SetOption (ENCODER_OPTION_LTR, &sLtrConfigVal);
+  int32_t iLtrPeriod = 2;
+  encoder_->SetOption (ENCODER_LTR_MARKING_PERIOD, &iLtrPeriod);
+  int iIdx = 0;
+  int iSkipedBytes;
+  unsigned long long uiEncTimeStamp = 100;
+  while (iIdx <= p.numframes) {
+    EncodeOneFrame (1);
+    //decoding after each encoding frame
+    int len = 0;
+    encToDecData (info, len);
+    unsigned char* pData[3] = { NULL };
+    memset (&dstBufInfo_, 0, sizeof (SBufferInfo));
+    uint32_t uiEcIdc = ERROR_CON_SLICE_COPY_CROSS_IDR_FREEZE_RES_CHANGE;
+    decoder_->SetOption (DECODER_OPTION_ERROR_CON_IDC, &uiEcIdc);
+    dstBufInfo_.uiInBsTimeStamp = uiEncTimeStamp;
+    rv = decoder_->DecodeFrame2 (info.sLayerInfo[0].pBsBuf, len, pData, &dstBufInfo_);
+    memset (&dstBufInfo_, 0, sizeof (SBufferInfo));
+    dstBufInfo_.uiInBsTimeStamp = uiEncTimeStamp;
+    rv = decoder_->DecodeFrame2 (NULL, 0, pData, &dstBufInfo_); //reconstruction
+    if (dstBufInfo_.iBufferStatus == 1) {
+      EXPECT_EQ (uiEncTimeStamp, dstBufInfo_.uiOutYuvTimeStamp);
+    }
+    iIdx++;
+    uiEncTimeStamp++;
+  }
+  (void) iSkipedBytes;
+}
+
+TEST_P (EncodeDecodeTestAPI, GetOptionIsRefPic) {
+  EncodeDecodeFileParamBase p = GetParam();
+  prepareParamDefault (1, p.slicenum, p.width, p.height, p.frameRate, &param_);
+  encoder_->Uninitialize();
+  int rv = encoder_->InitializeExt (&param_);
+  ASSERT_TRUE (rv == cmResultSuccess);
+
+  ASSERT_TRUE (InitialEncDec (p.width, p.height));
+  int32_t iTraceLevel = WELS_LOG_QUIET;
+  encoder_->SetOption (ENCODER_OPTION_TRACE_LEVEL, &iTraceLevel);
+  decoder_->SetOption (DECODER_OPTION_TRACE_LEVEL, &iTraceLevel);
+  int iIdx = 0;
+  int iSkipedBytes;
+  int iIsRefPic;
+  decoder_->GetOption (DECODER_OPTION_IS_REF_PIC, &iIsRefPic);
+  ASSERT_EQ (iIsRefPic, -1);
+
+  while (iIdx <= p.numframes) {
+    EncodeOneFrame (1);
+    //decoding after each encoding frame
+    int len = 0;
+    encToDecData (info, len);
+    unsigned char* pData[3] = { NULL };
+    memset (&dstBufInfo_, 0, sizeof (SBufferInfo));
+    rv = decoder_->DecodeFrame2 (info.sLayerInfo[0].pBsBuf, len, pData, &dstBufInfo_);
+    memset (&dstBufInfo_, 0, sizeof (SBufferInfo));
+    decoder_->GetOption (DECODER_OPTION_IS_REF_PIC, &iIsRefPic);
+    ASSERT_EQ (iIsRefPic, -1);
+    rv = decoder_->DecodeFrame2 (NULL, 0, pData, &dstBufInfo_); //reconstruction
+    if (dstBufInfo_.iBufferStatus == 1) {
+      decoder_->GetOption (DECODER_OPTION_IS_REF_PIC, &iIsRefPic);
+      ASSERT_TRUE (iIsRefPic >= 0);
+    }
+    iIdx++;
+  }
+  (void)iSkipedBytes;
+}
+
+TEST_P (EncodeDecodeTestAPI, GetOptionTid_AVC_NOPREFIX) {
+  SLTRMarkingFeedback m_LTR_Marking_Feedback;
+  SLTRRecoverRequest m_LTR_Recover_Request;
+  m_LTR_Recover_Request.uiIDRPicId = 0;
+  m_LTR_Recover_Request.iLayerId = 0;
+  m_LTR_Marking_Feedback.iLayerId = 0;
+  EncodeDecodeFileParamBase p = GetParam();
+  prepareParamDefault (1, p.slicenum,  p.width, p.height, p.frameRate, &param_);
+  param_.bPrefixNalAddingCtrl = false;
+  param_.iTemporalLayerNum = (rand() % 4) + 1;
+  encoder_->Uninitialize();
+  int rv = encoder_->InitializeExt (&param_);
+  ASSERT_TRUE (rv == cmResultSuccess);
+  m_LTR_Recover_Request.uiFeedbackType = NO_RECOVERY_REQUSET;
+  ASSERT_TRUE (InitialEncDec (p.width, p.height));
+  int32_t iTraceLevel = WELS_LOG_QUIET;
+  encoder_->SetOption (ENCODER_OPTION_TRACE_LEVEL, &iTraceLevel);
+  decoder_->SetOption (DECODER_OPTION_TRACE_LEVEL, &iTraceLevel);
+  int32_t iSpsPpsIdAddition = 1;
+  encoder_->SetOption (ENCODER_OPTION_SPS_PPS_ID_STRATEGY, &iSpsPpsIdAddition);
+  int32_t iIDRPeriod = 60;
+  encoder_->SetOption (ENCODER_OPTION_IDR_INTERVAL, &iIDRPeriod);
+  SLTRConfig sLtrConfigVal;
+  sLtrConfigVal.bEnableLongTermReference = 1;
+  sLtrConfigVal.iLTRRefNum = 1;
+  encoder_->SetOption (ENCODER_OPTION_LTR, &sLtrConfigVal);
+  int32_t iLtrPeriod = 2;
+  encoder_->SetOption (ENCODER_LTR_MARKING_PERIOD, &iLtrPeriod);
+  int iIdx = 0;
+  int iLossIdx = 0;
+  bool bVCLLoss = false;
+  while (iIdx <= p.numframes) {
+    EncodeOneFrame (1);
+    if (m_LTR_Recover_Request.uiFeedbackType == IDR_RECOVERY_REQUEST) {
+      ASSERT_TRUE (info.eFrameType == videoFrameTypeIDR);
+    }
+    //decoding after each encoding frame
+    int len = 0;
+    encToDecData (info, len);
+    unsigned char* pData[3] = { NULL };
+    memset (&dstBufInfo_, 0, sizeof (SBufferInfo));
+    SimulateNALLoss (info.sLayerInfo[0].pBsBuf, len, &m_SLostSim, p.pLossSequence, p.bLostPara, iLossIdx, bVCLLoss);
+    rv = decoder_->DecodeFrame2 (info.sLayerInfo[0].pBsBuf, len, pData, &dstBufInfo_);
+    int iTid = -1;
+    decoder_->GetOption (DECODER_OPTION_TEMPORAL_ID, &iTid);
+    if (iTid != -1) {
+      ASSERT_EQ (iTid, 0);
+    }
+    m_LTR_Recover_Request.uiFeedbackType = NO_RECOVERY_REQUSET;
+    LTRRecoveryRequest (decoder_, encoder_, &m_LTR_Recover_Request, rv, true);
+    rv = decoder_->DecodeFrame2 (NULL, 0, pData, &dstBufInfo_); //reconstruction
+    decoder_->GetOption (DECODER_OPTION_TEMPORAL_ID, &iTid);
+    std::vector<SLostSim>::iterator iter = m_SLostSim.begin();
+    bool bHasVCL = false;
+    for (unsigned int k = 0; k < m_SLostSim.size(); k++) {
+      if (IS_VCL_NAL (iter->eNalType, 0) && iter->isLost == false) {
+        bHasVCL = true;
+        break;
+      }
+      iter++;
+    }
+    (void) bHasVCL;
+    if (iTid != -1) {
+      ASSERT_EQ (iTid, 0);
+    }
+    LTRRecoveryRequest (decoder_, encoder_, &m_LTR_Recover_Request, rv, true);
+    LTRMarkFeedback (decoder_, encoder_, &m_LTR_Marking_Feedback, rv);
+    iIdx++;
+  }
+}
+
+TEST_P (EncodeDecodeTestAPI, GetOptionTid_AVC_WITH_PREFIX_NOLOSS) {
+  SLTRMarkingFeedback m_LTR_Marking_Feedback;
+  SLTRRecoverRequest m_LTR_Recover_Request;
+  m_LTR_Recover_Request.uiIDRPicId = 0;
+  m_LTR_Recover_Request.iLayerId = 0;
+  m_LTR_Marking_Feedback.iLayerId = 0;
+  EncodeDecodeFileParamBase p = GetParam();
+  prepareParamDefault (1, p.slicenum,  p.width, p.height, p.frameRate, &param_);
+  param_.bPrefixNalAddingCtrl = true;
+  param_.iTemporalLayerNum = (rand() % 4) + 1;
+  param_.iSpatialLayerNum = 1;
+  encoder_->Uninitialize();
+  int rv = encoder_->InitializeExt (&param_);
+  ASSERT_TRUE (rv == cmResultSuccess);
+  m_LTR_Recover_Request.uiFeedbackType = NO_RECOVERY_REQUSET;
+
+  ASSERT_TRUE (InitialEncDec (p.width, p.height));
+  int32_t iTraceLevel = WELS_LOG_QUIET;
+  encoder_->SetOption (ENCODER_OPTION_TRACE_LEVEL, &iTraceLevel);
+  decoder_->SetOption (DECODER_OPTION_TRACE_LEVEL, &iTraceLevel);
+  int32_t iSpsPpsIdAddition = 1;
+  encoder_->SetOption (ENCODER_OPTION_SPS_PPS_ID_STRATEGY, &iSpsPpsIdAddition);
+  int32_t iIDRPeriod = 60;
+  encoder_->SetOption (ENCODER_OPTION_IDR_INTERVAL, &iIDRPeriod);
+  SLTRConfig sLtrConfigVal;
+  sLtrConfigVal.bEnableLongTermReference = 1;
+  sLtrConfigVal.iLTRRefNum = 1;
+  encoder_->SetOption (ENCODER_OPTION_LTR, &sLtrConfigVal);
+  int32_t iLtrPeriod = 2;
+  encoder_->SetOption (ENCODER_LTR_MARKING_PERIOD, &iLtrPeriod);
+  int iIdx = 0;
+  while (iIdx <= p.numframes) {
+    EncodeOneFrame (1);
+    if (m_LTR_Recover_Request.uiFeedbackType == IDR_RECOVERY_REQUEST) {
+      ASSERT_TRUE (info.eFrameType == videoFrameTypeIDR);
+    }
+    //decoding after each encoding frame
+    int len = 0;
+    encToDecData (info, len);
+    unsigned char* pData[3] = { NULL };
+    memset (&dstBufInfo_, 0, sizeof (SBufferInfo));
+    ExtractDidNal (&info, len, &m_SLostSim, 0);
+    rv = decoder_->DecodeFrame2 (info.sLayerInfo[0].pBsBuf, len, pData, &dstBufInfo_);
+    int iTid = -1;
+    decoder_->GetOption (DECODER_OPTION_TEMPORAL_ID, &iTid);
+    ASSERT_EQ (iTid, -1);
+    m_LTR_Recover_Request.uiFeedbackType = NO_RECOVERY_REQUSET;
+    LTRRecoveryRequest (decoder_, encoder_, &m_LTR_Recover_Request, rv, true);
+    rv = decoder_->DecodeFrame2 (NULL, 0, pData, &dstBufInfo_); //reconstruction
+    decoder_->GetOption (DECODER_OPTION_TEMPORAL_ID, &iTid);
+    ASSERT_EQ (iTid, info.sLayerInfo[0].uiTemporalId);
+    LTRRecoveryRequest (decoder_, encoder_, &m_LTR_Recover_Request, rv, true);
+    LTRMarkFeedback (decoder_, encoder_, &m_LTR_Marking_Feedback, rv);
+    iIdx++;
+  }
+}
+
+TEST_P (EncodeDecodeTestAPI, GetOptionTid_SVC_L1_NOLOSS) {
+  SLTRMarkingFeedback m_LTR_Marking_Feedback;
+  SLTRRecoverRequest m_LTR_Recover_Request;
+  m_LTR_Recover_Request.uiIDRPicId = 0;
+  m_LTR_Recover_Request.iLayerId = 0;
+  m_LTR_Marking_Feedback.iLayerId = 0;
+  EncodeDecodeFileParamBase p = GetParam();
+  prepareParamDefault (2, p.slicenum,  p.width, p.height, p.frameRate, &param_);
+  param_.iTemporalLayerNum = (rand() % 4) + 1;
+  param_.iSpatialLayerNum = 2;
+  encoder_->Uninitialize();
+  int rv = encoder_->InitializeExt (&param_);
+  ASSERT_TRUE (rv == cmResultSuccess);
+  m_LTR_Recover_Request.uiFeedbackType = NO_RECOVERY_REQUSET;
+
+  ASSERT_TRUE (InitialEncDec (p.width, p.height));
+  int32_t iTraceLevel = WELS_LOG_QUIET;
+  encoder_->SetOption (ENCODER_OPTION_TRACE_LEVEL, &iTraceLevel);
+  decoder_->SetOption (DECODER_OPTION_TRACE_LEVEL, &iTraceLevel);
+  int32_t iSpsPpsIdAddition = 1;
+  encoder_->SetOption (ENCODER_OPTION_SPS_PPS_ID_STRATEGY, &iSpsPpsIdAddition);
+  int32_t iIDRPeriod = 60;
+  encoder_->SetOption (ENCODER_OPTION_IDR_INTERVAL, &iIDRPeriod);
+  SLTRConfig sLtrConfigVal;
+  sLtrConfigVal.bEnableLongTermReference = 1;
+  sLtrConfigVal.iLTRRefNum = 1;
+  encoder_->SetOption (ENCODER_OPTION_LTR, &sLtrConfigVal);
+  int32_t iLtrPeriod = 2;
+  encoder_->SetOption (ENCODER_LTR_MARKING_PERIOD, &iLtrPeriod);
+  int iIdx = 0;
+  while (iIdx <= p.numframes) {
+    EncodeOneFrame (1);
+    if (m_LTR_Recover_Request.uiFeedbackType == IDR_RECOVERY_REQUEST) {
+      ASSERT_TRUE (info.eFrameType == videoFrameTypeIDR);
+    }
+    //decoding after each encoding frame
+    int len = 0;
+    encToDecData (info, len);
+    unsigned char* pData[3] = { NULL };
+    memset (&dstBufInfo_, 0, sizeof (SBufferInfo));
+    ExtractDidNal (&info, len, &m_SLostSim, 1);
+    rv = decoder_->DecodeFrame2 (info.sLayerInfo[0].pBsBuf, len, pData, &dstBufInfo_);
+    int iTid = -1;
+    decoder_->GetOption (DECODER_OPTION_TEMPORAL_ID, &iTid);
+    ASSERT_EQ (iTid, -1);
+    m_LTR_Recover_Request.uiFeedbackType = NO_RECOVERY_REQUSET;
+    LTRRecoveryRequest (decoder_, encoder_, &m_LTR_Recover_Request, rv, true);
+    rv = decoder_->DecodeFrame2 (NULL, 0, pData, &dstBufInfo_); //reconstruction
+    decoder_->GetOption (DECODER_OPTION_TEMPORAL_ID, &iTid);
+    ASSERT_EQ (iTid, info.sLayerInfo[0].uiTemporalId);
+    LTRRecoveryRequest (decoder_, encoder_, &m_LTR_Recover_Request, rv, true);
+    LTRMarkFeedback (decoder_, encoder_, &m_LTR_Marking_Feedback, rv);
+    iIdx++;
+  }
+}
+
+
+
+TEST_P (EncodeDecodeTestAPI, SetOption_Trace) {
+  SLTRMarkingFeedback m_LTR_Marking_Feedback;
+  SLTRRecoverRequest m_LTR_Recover_Request;
+  m_LTR_Recover_Request.uiIDRPicId = 0;
+  m_LTR_Recover_Request.iLayerId = 0;
+  m_LTR_Marking_Feedback.iLayerId = 0;
+  EncodeDecodeFileParamBase p = GetParam();
+  prepareParamDefault (1, p.slicenum,  p.width, p.height, p.frameRate, &param_);
+  param_.iSpatialLayerNum = 1;
+
+  int rv = encoder_->InitializeExt (&param_);
+  ASSERT_TRUE (rv == cmResultSuccess);
+  m_LTR_Recover_Request.uiFeedbackType = NO_RECOVERY_REQUSET;
+
+  ASSERT_TRUE (InitialEncDec (p.width, p.height));
+  int32_t iTraceLevel = WELS_LOG_QUIET;
+  pFunc = TestOutPutTrace;
+  pTraceInfo = &sTrace;
+  sTrace.iTarLevel = iTraceLevel;
+  encoder_->SetOption (ENCODER_OPTION_TRACE_LEVEL, &iTraceLevel);
+  decoder_->SetOption (DECODER_OPTION_TRACE_LEVEL, &iTraceLevel);
+  encoder_->SetOption (ENCODER_OPTION_TRACE_CALLBACK, &pFunc);
+  encoder_->SetOption (ENCODER_OPTION_TRACE_CALLBACK_CONTEXT, &pTraceInfo);
+  decoder_->SetOption (DECODER_OPTION_TRACE_CALLBACK, &pFunc);
+  decoder_->SetOption (DECODER_OPTION_TRACE_CALLBACK_CONTEXT, &pTraceInfo);
+
+
+  int32_t iSpsPpsIdAddition = 1;
+  encoder_->SetOption (ENCODER_OPTION_SPS_PPS_ID_STRATEGY, &iSpsPpsIdAddition);
+  int32_t iIDRPeriod = 60;
+  encoder_->SetOption (ENCODER_OPTION_IDR_INTERVAL, &iIDRPeriod);
+  SLTRConfig sLtrConfigVal;
+  sLtrConfigVal.bEnableLongTermReference = 1;
+  sLtrConfigVal.iLTRRefNum = 1;
+  encoder_->SetOption (ENCODER_OPTION_LTR, &sLtrConfigVal);
+  int32_t iLtrPeriod = 2;
+  encoder_->SetOption (ENCODER_LTR_MARKING_PERIOD, &iLtrPeriod);
+  int iIdx = 0;
+  int iLossIdx = 0;
+  bool bVCLLoss = false;
+  while (iIdx <= p.numframes) {
+    iTraceLevel = rand() % 33;
+    sTrace.iTarLevel = iTraceLevel;
+    encoder_->SetOption (ENCODER_OPTION_TRACE_LEVEL, &iTraceLevel);
+    decoder_->SetOption (DECODER_OPTION_TRACE_LEVEL, &iTraceLevel);
+    EncodeOneFrame (1);
+    if (m_LTR_Recover_Request.uiFeedbackType == IDR_RECOVERY_REQUEST) {
+      ASSERT_TRUE (info.eFrameType == videoFrameTypeIDR);
+    }
+    //decoding after each encoding frame
+    int len = 0;
+    encToDecData (info, len);
+    unsigned char* pData[3] = { NULL };
+    memset (&dstBufInfo_, 0, sizeof (SBufferInfo));
+    ExtractDidNal (&info, len, &m_SLostSim, 0);
+    SimulateNALLoss (info.sLayerInfo[0].pBsBuf, len, &m_SLostSim, p.pLossSequence, p.bLostPara, iLossIdx, bVCLLoss);
+    rv = decoder_->DecodeFrame2 (info.sLayerInfo[0].pBsBuf, len, pData, &dstBufInfo_);
+    m_LTR_Recover_Request.uiFeedbackType = NO_RECOVERY_REQUSET;
+    LTRRecoveryRequest (decoder_, encoder_, &m_LTR_Recover_Request, rv, true);
+    rv = decoder_->DecodeFrame2 (NULL, 0, pData, &dstBufInfo_); //reconstruction
+    LTRRecoveryRequest (decoder_, encoder_, &m_LTR_Recover_Request, rv, true);
+    LTRMarkFeedback (decoder_, encoder_, &m_LTR_Marking_Feedback, rv);
+    iIdx++;
+  }
+}
+
+TEST_P (EncodeDecodeTestAPI, SetOption_Trace_NULL) {
+  SLTRMarkingFeedback m_LTR_Marking_Feedback;
+  SLTRRecoverRequest m_LTR_Recover_Request;
+  m_LTR_Recover_Request.uiIDRPicId = 0;
+  m_LTR_Recover_Request.iLayerId = 0;
+  m_LTR_Marking_Feedback.iLayerId = 0;
+  EncodeDecodeFileParamBase p = GetParam();
+  prepareParamDefault (1, p.slicenum,  p.width, p.height, p.frameRate, &param_);
+  param_.iSpatialLayerNum = 1;
+  int rv = encoder_->InitializeExt (&param_);
+  ASSERT_TRUE (rv == cmResultSuccess);
+  m_LTR_Recover_Request.uiFeedbackType = NO_RECOVERY_REQUSET;
+
+  ASSERT_TRUE (InitialEncDec (p.width, p.height));
+
+  int32_t iTraceLevel = WELS_LOG_QUIET;
+  pFunc = NULL;
+  pTraceInfo = NULL;
+  encoder_->SetOption (ENCODER_OPTION_TRACE_CALLBACK, &pFunc);
+  encoder_->SetOption (ENCODER_OPTION_TRACE_CALLBACK_CONTEXT, &pTraceInfo);
+  decoder_->SetOption (DECODER_OPTION_TRACE_CALLBACK, &pFunc);
+  decoder_->SetOption (DECODER_OPTION_TRACE_CALLBACK_CONTEXT, &pTraceInfo);
+  encoder_->SetOption (ENCODER_OPTION_TRACE_LEVEL, &iTraceLevel);
+  decoder_->SetOption (DECODER_OPTION_TRACE_LEVEL, &iTraceLevel);
+
+  int32_t iSpsPpsIdAddition = 1;
+  encoder_->SetOption (ENCODER_OPTION_SPS_PPS_ID_STRATEGY, &iSpsPpsIdAddition);
+  int32_t iIDRPeriod = 60;
+  encoder_->SetOption (ENCODER_OPTION_IDR_INTERVAL, &iIDRPeriod);
+  SLTRConfig sLtrConfigVal;
+  sLtrConfigVal.bEnableLongTermReference = 1;
+  sLtrConfigVal.iLTRRefNum = 1;
+  encoder_->SetOption (ENCODER_OPTION_LTR, &sLtrConfigVal);
+  int32_t iLtrPeriod = 2;
+  encoder_->SetOption (ENCODER_LTR_MARKING_PERIOD, &iLtrPeriod);
+  int iIdx = 0;
+  int iLossIdx = 0;
+  bool bVCLLoss = false;
+  while (iIdx <= p.numframes) {
+    iTraceLevel = rand() % 33;
+    encoder_->SetOption (ENCODER_OPTION_TRACE_LEVEL, &iTraceLevel);
+    decoder_->SetOption (DECODER_OPTION_TRACE_LEVEL, &iTraceLevel);
+    EncodeOneFrame (1);
+    if (m_LTR_Recover_Request.uiFeedbackType == IDR_RECOVERY_REQUEST) {
+      ASSERT_TRUE (info.eFrameType == videoFrameTypeIDR);
+    }
+    //decoding after each encoding frame
+    int len = 0;
+    encToDecData (info, len);
+    unsigned char* pData[3] = { NULL };
+    memset (&dstBufInfo_, 0, sizeof (SBufferInfo));
+    ExtractDidNal (&info, len, &m_SLostSim, 0);
+    SimulateNALLoss (info.sLayerInfo[0].pBsBuf, len, &m_SLostSim, p.pLossSequence, p.bLostPara, iLossIdx, bVCLLoss);
+    rv = decoder_->DecodeFrame2 (info.sLayerInfo[0].pBsBuf, len, pData, &dstBufInfo_);
+    m_LTR_Recover_Request.uiFeedbackType = NO_RECOVERY_REQUSET;
+    LTRRecoveryRequest (decoder_, encoder_, &m_LTR_Recover_Request, rv, true);
+    rv = decoder_->DecodeFrame2 (NULL, 0, pData, &dstBufInfo_); //reconstruction
+    LTRRecoveryRequest (decoder_, encoder_, &m_LTR_Recover_Request, rv, true);
+    LTRMarkFeedback (decoder_, encoder_, &m_LTR_Marking_Feedback, rv);
+    iIdx++;
+  }
+}
+
+
+
+
+class DecodeCrashTestAPI : public ::testing::TestWithParam<EncodeDecodeFileParamBase>, public EncodeDecodeTestBase {
+ public:
+  void SetUp() {
+    EncodeDecodeTestBase::SetUp();
+    ucBuf_ = NULL;
+    ucBuf_ = new unsigned char [1000000];
+    ASSERT_TRUE (ucBuf_ != NULL);
+  }
+
+  void TearDown() {
+    EncodeDecodeTestBase::TearDown();
+    if (NULL != ucBuf_) {
+      delete[] ucBuf_;
+      ucBuf_ = NULL;
+    }
+    ASSERT_TRUE (ucBuf_ == NULL);
+  }
+
+  void prepareParam (int iLayerNum, int iSliceNum, int width, int height, float framerate, SEncParamExt* pParam) {
+    memset (pParam, 0, sizeof (SEncParamExt));
+    EncodeDecodeTestBase::prepareParam (iLayerNum, iSliceNum,  width, height, framerate, pParam);
+  }
+
+  void EncodeOneFrame() {
+    int frameSize = EncPic.iPicWidth * EncPic.iPicHeight * 3 / 2;
+    memset (buf_.data(), iRandValue, (frameSize >> 2));
+    memset (buf_.data() + (frameSize >> 2), rand() % 256, (frameSize - (frameSize >> 2)));
+    int rv = encoder_->EncodeFrame (&EncPic, &info);
+    ASSERT_TRUE (rv == cmResultSuccess || rv == cmUnknownReason);
+  }
+ protected:
+  unsigned char* ucBuf_;
+};
+
+struct EncodeDecodeParamBase {
+  int width;
+  int height;
+  float frameRate;
+  int iTarBitrate;
+};
+
+#define NUM_OF_POSSIBLE_RESOLUTION (9)
+static const EncodeDecodeParamBase kParamArray[] = {
+  {160, 90, 6.0f, 250000},
+  {90, 160, 6.0f, 250000},
+  {320, 180, 12.0f, 500000},
+  {180, 320, 12.0f, 500000},
+  {480, 270, 12.0f, 600000},
+  {270, 480, 12.0f, 600000},
+  {640, 360, 24.0f, 800000},
+  {360, 640, 24.0f, 800000},
+  {1280, 720, 24.0f, 1000000},
+};
+
+//#define DEBUG_FILE_SAVE_CRA
+TEST_F (DecodeCrashTestAPI, DecoderCrashTest) {
+  uint32_t uiGet;
+  encoder_->Uninitialize();
+
+  //do tests until crash
+  unsigned int uiLoopRound = 0;
+  unsigned char* pucBuf = ucBuf_;
+  int iDecAuSize;
+#ifdef DEBUG_FILE_SAVE_CRA
+  //open file to save tested BS
+  FILE* fDataFile = fopen ("test_crash.264", "wb");
+  FILE* fLenFile = fopen ("test_crash_len.log", "w");
+  int iFileSize = 0;
+#endif
+
+  //set eCurStrategy for one test
+  EParameterSetStrategy eCurStrategy = CONSTANT_ID;
+  switch (rand() % 7) {
+  case 1:
+    eCurStrategy = INCREASING_ID;
+    break;
+  case 2:
+    eCurStrategy = SPS_LISTING;
+    break;
+  case 3:
+    eCurStrategy = SPS_LISTING_AND_PPS_INCREASING;
+    break;
+  case 6:
+    eCurStrategy = SPS_PPS_LISTING;
+    break;
+  default:
+    //using the initial value
+    break;
+  }
+
+  do {
+    int iTotalFrameNum = (rand() % 100) + 1;
+    int iSeed = rand() % NUM_OF_POSSIBLE_RESOLUTION;
+    EncodeDecodeParamBase p = kParamArray[iSeed];
+#ifdef DEBUG_FILE_SAVE_CRA
+    printf ("using param set %d in loop %d\n", iSeed, uiLoopRound);
+#endif
+    //Initialize Encoder
+    prepareParam (1, 1, p.width, p.height, p.frameRate, &param_);
+    param_.iRCMode = RC_TIMESTAMP_MODE;
+    param_.iTargetBitrate = p.iTarBitrate;
+    param_.uiIntraPeriod = 0;
+    param_.eSpsPpsIdStrategy = eCurStrategy;
+    param_.bEnableBackgroundDetection = true;
+    param_.bEnableSceneChangeDetect = (rand() % 3) ? true : false;
+    param_.bPrefixNalAddingCtrl = (rand() % 2) ? true : false;
+    param_.iEntropyCodingModeFlag = 0;
+    param_.bEnableFrameSkip = true;
+    param_.iMultipleThreadIdc = 0;
+    param_.sSpatialLayers[0].iSpatialBitrate = p.iTarBitrate;
+    param_.sSpatialLayers[0].iMaxSpatialBitrate = p.iTarBitrate << 1;
+    param_.sSpatialLayers[0].sSliceArgument.uiSliceMode = (rand() % 2) ? SM_SIZELIMITED_SLICE : SM_SINGLE_SLICE;
+    if (param_.sSpatialLayers[0].sSliceArgument.uiSliceMode == SM_SIZELIMITED_SLICE) {
+      param_.sSpatialLayers[0].sSliceArgument.uiSliceSizeConstraint = 1400;
+      param_.uiMaxNalSize = 1400;
+    } else {
+      param_.sSpatialLayers[0].sSliceArgument.uiSliceSizeConstraint = 0;
+      param_.uiMaxNalSize = 0;
+    }
+
+    int rv = encoder_->InitializeExt (&param_);
+    ASSERT_TRUE (rv == cmResultSuccess);
+    decoder_->GetOption (DECODER_OPTION_ERROR_CON_IDC, &uiGet);
+    EXPECT_EQ (uiGet, (uint32_t) ERROR_CON_SLICE_COPY); //default value should be ERROR_CON_SLICE_COPY
+    int32_t iTraceLevel = WELS_LOG_QUIET;
+    encoder_->SetOption (ENCODER_OPTION_TRACE_LEVEL, &iTraceLevel);
+    decoder_->SetOption (DECODER_OPTION_TRACE_LEVEL, &iTraceLevel);
+
+    //Start for enc/dec
+    int iIdx = 0;
+    unsigned char* pData[3] = { NULL };
+
+    EncodeDecodeFileParamBase pInput; //to conform with old functions
+    pInput.width =  p.width;
+    pInput.height = p.height;
+    pInput.frameRate = p.frameRate;
+    ASSERT_TRUE (prepareEncDecParam (pInput));
+    while (iIdx++ < iTotalFrameNum) { // loop in frame
+      EncodeOneFrame();
+#ifdef DEBUG_FILE_SAVE_CRA
+      //reset file if file size large
+      if ((info.eFrameType == videoFrameTypeIDR) && (iFileSize >= (1 << 25))) {
+        fclose (fDataFile);
+        fDataFile = fopen ("test_crash.264", "wb");
+        iFileSize = 0;
+        decoder_->Uninitialize();
+
+        SDecodingParam decParam;
+        memset (&decParam, 0, sizeof (SDecodingParam));
+        decParam.uiTargetDqLayer = UCHAR_MAX;
+        decParam.eEcActiveIdc = ERROR_CON_SLICE_COPY;
+        decParam.sVideoProperty.eVideoBsType = VIDEO_BITSTREAM_DEFAULT;
+
+        rv = decoder_->Initialize (&decParam);
+        ASSERT_EQ (0, rv);
+      }
+#endif
+      if (info.eFrameType == videoFrameTypeSkip)
+        continue;
+      //deal with packets
+      unsigned char* pBsBuf;
+      iDecAuSize = 0;
+      pucBuf = ucBuf_; //init buf start pos for decoder usage
+      for (int iLayerNum = 0; iLayerNum < info.iLayerNum; iLayerNum++) {
+        SLayerBSInfo* pLayerBsInfo = &info.sLayerInfo[iLayerNum];
+        pBsBuf = info.sLayerInfo[iLayerNum].pBsBuf;
+        int iTotalNalCnt = pLayerBsInfo->iNalCount;
+        for (int iNalCnt = 0; iNalCnt < iTotalNalCnt; iNalCnt++) {  //loop in NAL
+          int iPacketSize = pLayerBsInfo->pNalLengthInByte[iNalCnt];
+          //packet loss
+          int iLossRateRange = (uiLoopRound % 100) + 1; //1-100
+          int iLossRate = (rand() % iLossRateRange);
+          bool bPacketLost = (rand() % 101) > (100 -
+                                               iLossRate);   // [0, (100-iLossRate)] indicates NO LOSS, (100-iLossRate, 100] indicates LOSS
+          if (!bPacketLost) { //no loss
+            memcpy (pucBuf, pBsBuf, iPacketSize);
+            pucBuf += iPacketSize;
+            iDecAuSize += iPacketSize;
+          }
+#ifdef DEBUG_FILE_SAVE_CRA
+          else {
+            printf ("lost packet size=%d at frame-type=%d at loss rate %d (%d)\n", iPacketSize, info.eFrameType, iLossRate,
+                    iLossRateRange);
+          }
+#endif
+          //update bs info
+          pBsBuf += iPacketSize;
+        } //nal
+      } //layer
+
+#ifdef DEBUG_FILE_SAVE_CRA
+      //save to file
+      fwrite (ucBuf_, 1, iDecAuSize, fDataFile);
+      fflush (fDataFile);
+      iFileSize += iDecAuSize;
+
+      //save to len file
+      unsigned long ulTmp[4];
+      ulTmp[0] = ulTmp[1] = ulTmp[2] = iIdx;
+      ulTmp[3] = iDecAuSize;
+      fwrite (ulTmp, sizeof (unsigned long), 4, fLenFile); // index, timeStamp, data size
+      fflush (fLenFile);
+#endif
+
+      //decode
+      pData[0] = pData[1] = pData[2] = 0;
+      memset (&dstBufInfo_, 0, sizeof (SBufferInfo));
+
+      rv = decoder_->DecodeFrame2 (ucBuf_, iDecAuSize, pData, &dstBufInfo_);
+      rv = decoder_->DecodeFrame2 (NULL, 0, pData, &dstBufInfo_); //reconstruction
+      //guarantee decoder EC status
+      decoder_->GetOption (DECODER_OPTION_ERROR_CON_IDC, &uiGet);
+      EXPECT_EQ (uiGet, (uint32_t) ERROR_CON_SLICE_COPY);
+    } //frame
+    uiLoopRound ++;
+    if (uiLoopRound >= (1 << 30))
+      uiLoopRound = 0;
+#ifdef DEBUG_FILE_SAVE_CRA
+    if (uiLoopRound % 100 == 0)
+      printf ("run %d times.\n", uiLoopRound);
+  } while (1); //while (iLoopRound<100);
+  fclose (fDataFile);
+  fclose (fLenFile);
+#else
+  }
+  while (uiLoopRound < 10);
+#endif
+
+}
+
+const uint32_t kiTotalLayer = 3; //DO NOT CHANGE!
+const uint32_t kiSliceNum = 2; //DO NOT CHANGE!
+const uint32_t kiWidth = 160; //DO NOT CHANGE!
+const uint32_t kiHeight = 96; //DO NOT CHANGE!
+const uint32_t kiFrameRate = 12; //DO NOT CHANGE!
+const uint32_t kiFrameNum = 100; //DO NOT CHANGE!
+const char* const pHashStr[][2] = { //DO NOT CHANGE!
+  // Allow for different output depending on whether averaging is done
+  // vertically or horizontally first when downsampling.
+  { "caaaa3352ab8614e3a35836f5d7c9a528294e953", "326cc236e9ba5277aedc5cf0865dd4cbd2f89fe0" },
+  { "2dc97661e94515d9947a344127062f82814afc2a", "72f36bb33d190979be88077c6166a09767dd2992" },
+  { "106ec96a90412aabea5c0cfa6bfc654a0b5db33e", "998c2947bccf140bde1e43e29376614038eb7c71" }
+};
+
+class DecodeParseAPI : public ::testing::TestWithParam<EncodeDecodeFileParamBase>, public EncodeDecodeTestBase {
+ public:
+  DecodeParseAPI() {
+    memset (&BsInfo_, 0, sizeof (SParserBsInfo));
+    fYuv_ = NULL;
+    iWidth_ = 0;
+    iHeight_ = 0;
+    memset (&ctx_, 0, sizeof (SHA1Context));
+  }
+  void SetUp() {
+    SHA1Reset (&ctx_);
+    EncodeDecodeTestBase::SetUp();
+
+    if (decoder_)
+      decoder_->Uninitialize();
+    SDecodingParam decParam;
+    memset (&decParam, 0, sizeof (SDecodingParam));
+    decParam.uiTargetDqLayer = UCHAR_MAX;
+    decParam.eEcActiveIdc = ERROR_CON_SLICE_COPY;
+    decParam.bParseOnly = true;
+    decParam.sVideoProperty.eVideoBsType = VIDEO_BITSTREAM_DEFAULT;
+
+    int rv = decoder_->Initialize (&decParam);
+    ASSERT_EQ (0, rv);
+    memset (&BsInfo_, 0, sizeof (SParserBsInfo));
+    const char* sFileName = "res/CiscoVT2people_160x96_6fps.yuv";
+#if defined(ANDROID_NDK)
+    std::string filename = std::string ("/sdcard/") + sFileName;
+    ASSERT_TRUE ((fYuv_ = fopen (filename.c_str(), "rb")) != NULL);
+#else
+    ASSERT_TRUE ((fYuv_ = fopen (sFileName, "rb")) != NULL);
+#endif
+    iWidth_ = kiWidth;
+    iHeight_ = kiHeight;
+  }
+  void TearDown() {
+    EncodeDecodeTestBase::TearDown();
+    if (fYuv_ != NULL) {
+      fclose (fYuv_);
+      fYuv_ = NULL;
+    }
+  }
+
+  bool prepareEncDecParam (const EncodeDecodeFileParamBase p) {
+    if (!EncodeDecodeTestBase::prepareEncDecParam (p))
+      return false;
+    unsigned char* pTmpPtr = BsInfo_.pDstBuff; //store for restore
+    memset (&BsInfo_, 0, sizeof (SParserBsInfo));
+    BsInfo_.pDstBuff = pTmpPtr;
+    return true;
+  }
+
+  void MockInputData (uint8_t* pData, int32_t iSize) {
+    int32_t iCurr = 0;
+    while (iCurr < iSize) {
+      * (pData + iCurr) = (* (pData + iCurr) + (rand() % 20) + 256) & 0x00ff;
+      iCurr++;
+    }
+  }
+
+  void EncodeOneFrame (bool bMock) {
+    int iFrameSize = iWidth_ * iHeight_ * 3 / 2;
+    int iSize = (int) fread (buf_.data(), sizeof (char), iFrameSize, fYuv_);
+    if (feof (fYuv_) || iSize != iFrameSize) {
+      rewind (fYuv_);
+      iSize = (int) fread (buf_.data(), sizeof (char), iFrameSize, fYuv_);
+      ASSERT_TRUE (iSize == iFrameSize);
+    }
+    if (bMock) {
+      MockInputData (buf_.data(), iWidth_ * iHeight_);
+    }
+    int rv = encoder_->EncodeFrame (&EncPic, &info);
+    ASSERT_TRUE (rv == cmResultSuccess || rv == cmUnknownReason);
+  }
+
+  void prepareParam (int iLayerNum, int iSliceNum, int width, int height, float framerate, SEncParamExt* pParam) {
+    memset (pParam, 0, sizeof (SEncParamExt));
+    EncodeDecodeTestBase::prepareParam (iLayerNum, iSliceNum,  width, height, framerate, pParam);
+  }
+
+ protected:
+  SParserBsInfo BsInfo_;
+  FILE* fYuv_;
+  int iWidth_;
+  int iHeight_;
+  SHA1Context ctx_;
+};
+
+//#define DEBUG_FILE_SAVE_PARSEONLY_GENERAL
+TEST_F (DecodeParseAPI, ParseOnly_General) {
+  EncodeDecodeFileParamBase p;
+  p.width = iWidth_;
+  p.height = iHeight_;
+  p.frameRate = kiFrameRate;
+  p.numframes = kiFrameNum;
+  prepareParam (kiTotalLayer, kiSliceNum, p.width, p.height, p.frameRate, &param_);
+  param_.iSpatialLayerNum = kiTotalLayer;
+  encoder_->Uninitialize();
+  int rv = encoder_->InitializeExt (&param_);
+  ASSERT_TRUE (rv == 0);
+  int32_t iTraceLevel = WELS_LOG_QUIET;
+  encoder_->SetOption (ENCODER_OPTION_TRACE_LEVEL, &iTraceLevel);
+  decoder_->SetOption (DECODER_OPTION_TRACE_LEVEL, &iTraceLevel);
+  uint32_t uiTargetLayerId = rand() % kiTotalLayer; //run only once
+#ifdef DEBUG_FILE_SAVE_PARSEONLY_GENERAL
+  FILE* fDec = fopen ("output.264", "wb");
+  FILE* fEnc = fopen ("enc.264", "wb");
+  FILE* fExtract = fopen ("extract.264", "wb");
+#endif
+  if (uiTargetLayerId < kiTotalLayer) { //should always be true
+    //Start for enc
+    int iLen = 0;
+    ASSERT_TRUE (prepareEncDecParam (p));
+    int iFrame = 0;
+
+    while (iFrame < p.numframes) {
+      //encode
+      EncodeOneFrame (0);
+      //extract target layer data
+      encToDecData (info, iLen);
+#ifdef DEBUG_FILE_SAVE_PARSEONLY_GENERAL
+      fwrite (info.sLayerInfo[0].pBsBuf, iLen, 1, fEnc);
+#endif
+      ExtractDidNal (&info, iLen, &m_SLostSim, uiTargetLayerId);
+#ifdef DEBUG_FILE_SAVE_PARSEONLY_GENERAL
+      fwrite (info.sLayerInfo[0].pBsBuf, iLen, 1, fExtract);
+#endif
+      //parseonly
+      //BsInfo_.pDstBuff = new unsigned char [1000000];
+      rv = decoder_->DecodeParser (info.sLayerInfo[0].pBsBuf, iLen, &BsInfo_);
+      EXPECT_TRUE (rv == 0);
+      EXPECT_TRUE (BsInfo_.iNalNum == 0);
+      rv = decoder_->DecodeParser (NULL, 0, &BsInfo_);
+      EXPECT_TRUE (rv == 0);
+      EXPECT_TRUE (BsInfo_.iNalNum != 0);
+      //get final output bs
+      iLen = 0;
+      int i = 0;
+      while (i < BsInfo_.iNalNum) {
+        iLen += BsInfo_.pNalLenInByte[i];
+        i++;
+      }
+#ifdef DEBUG_FILE_SAVE_PARSEONLY_GENERAL
+      fwrite (BsInfo_.pDstBuff, iLen, 1, fDec);
+#endif
+      SHA1Input (&ctx_, BsInfo_.pDstBuff, iLen);
+      iFrame++;
+    }
+    //calculate final SHA1 value
+    unsigned char digest[SHA_DIGEST_LENGTH];
+    SHA1Result (&ctx_, digest);
+    if (!HasFatalFailure()) {
+      CompareHashAnyOf (digest, pHashStr[uiTargetLayerId], sizeof * pHashStr / sizeof** pHashStr);
+    }
+  } //while
+#ifdef DEBUG_FILE_SAVE_PARSEONLY_GENERAL
+  fclose (fEnc);
+  fclose (fExtract);
+  fclose (fDec);
+#endif
+}
+
+//This case is for one layer only, for incomplete frame input
+//First slice is loss for random one picture with 2 slices per pic
+TEST_F (DecodeParseAPI, ParseOnly_SpecSliceLoss) {
+  int32_t iLayerNum = 1;
+  int32_t iSliceNum = 2;
+  EncodeDecodeFileParamBase p;
+  p.width = iWidth_;
+  p.height = iHeight_;
+  p.frameRate = kiFrameRate;
+  p.numframes = 5;
+  prepareParam (iLayerNum, iSliceNum, p.width, p.height, p.frameRate, &param_);
+  param_.iSpatialLayerNum = iLayerNum;
+  encoder_->Uninitialize();
+  int rv = encoder_->InitializeExt (&param_);
+  ASSERT_TRUE (rv == 0);
+  int32_t iTraceLevel = WELS_LOG_QUIET;
+  encoder_->SetOption (ENCODER_OPTION_TRACE_LEVEL, &iTraceLevel);
+  decoder_->SetOption (DECODER_OPTION_TRACE_LEVEL, &iTraceLevel);
+
+  int32_t iMissedPicNum = rand() % (p.numframes - 1) + 1; //IDR no loss
+  //Start for enc
+  int iLen = 0;
+  uint32_t uiGet;
+  ASSERT_TRUE (prepareEncDecParam (p));
+  int iFrame = 0;
+
+  while (iFrame < p.numframes) {
+    //encode
+    EncodeOneFrame (0);
+    //parseonly
+    if (iFrame == iMissedPicNum) { //make current frame partly missing
+      //Frame: P, first slice loss
+      int32_t iTotalSliceSize = 0;
+      encToDecSliceData (0, 0, info, iTotalSliceSize); //slice 1 lost
+      encToDecSliceData (0, 1, info, iLen); //slice 2
+      decoder_->GetOption (DECODER_OPTION_ERROR_CON_IDC, &uiGet);
+      EXPECT_EQ (uiGet, (uint32_t) ERROR_CON_DISABLE);
+      rv = decoder_->DecodeParser (info.sLayerInfo[0].pBsBuf + iTotalSliceSize, iLen, &BsInfo_);
+      EXPECT_TRUE (rv == 0);
+      EXPECT_TRUE (BsInfo_.iNalNum == 0);
+      rv = decoder_->DecodeParser (NULL, 0, &BsInfo_);
+      EXPECT_TRUE (rv != 0);
+    } else { //normal frame, complete
+      encToDecData (info, iLen);
+      rv = decoder_->DecodeParser (info.sLayerInfo[0].pBsBuf, iLen, &BsInfo_);
+      EXPECT_TRUE (rv == 0); //parse correct
+      EXPECT_TRUE (BsInfo_.iNalNum == 0);
+      rv = decoder_->DecodeParser (NULL, 0, &BsInfo_);
+      if (iFrame < iMissedPicNum) { //correct frames, all OK with output
+        EXPECT_TRUE (rv == 0);
+        EXPECT_TRUE (BsInfo_.iNalNum != 0);
+      } else { //(iFrame > iMissedPicNum), should output nothing as error
+        EXPECT_TRUE (rv != 0);
+        EXPECT_TRUE (BsInfo_.iNalNum == 0);
+      }
+    }
+    iFrame++;
+  } //while
+}
+
+TEST_F (DecodeParseAPI, ParseOnly_SpecStatistics) {
+  //set params
+  int32_t iLayerNum = 1;
+  int32_t iSliceNum = 1;
+  EncodeDecodeFileParamBase p;
+  const int iLoopNum = 10;
+  p.frameRate = kiFrameRate;
+  p.numframes = 2;  //encode 2 frames in each test
+  p.width = iWidth_ = 16;
+  p.height = iHeight_ = 16; //default start width/height = 16, will be modified each time
+  int iTotalFrmCnt = 0;
+  for (int i = 0; i < iLoopNum; ++i) {
+    prepareParam (iLayerNum, iSliceNum, p.width, p.height, p.frameRate, &param_);
+    param_.iSpatialLayerNum = iLayerNum;
+    param_.sSpatialLayers[0].iDLayerQp = 40; //to revent size too limited to encoding fail
+    encoder_->Uninitialize();
+    int rv = encoder_->InitializeExt (&param_);
+    ASSERT_TRUE (rv == 0);
+    int32_t iTraceLevel = WELS_LOG_QUIET;
+    rv = encoder_->SetOption (ENCODER_OPTION_TRACE_LEVEL, &iTraceLevel);
+    ASSERT_TRUE (rv == 0);
+    rv = decoder_->SetOption (DECODER_OPTION_TRACE_LEVEL, &iTraceLevel);
+    ASSERT_TRUE (rv == 0);
+    //Start for enc
+    int iLen = 0;
+    ASSERT_TRUE (prepareEncDecParam (p));
+    int iFrame = 0;
+    while (iFrame < p.numframes) {
+      EncodeOneFrame (0);
+      encToDecData (info, iLen);
+      iFrame++;
+      iTotalFrmCnt++;
+      rv = decoder_->DecodeParser (info.sLayerInfo[0].pBsBuf, iLen, &BsInfo_);
+      ASSERT_TRUE (rv == 0);
+      ASSERT_TRUE (BsInfo_.iNalNum == 0);
+      rv = decoder_->DecodeParser (NULL, 0, &BsInfo_);
+      ASSERT_TRUE (rv == 0);
+      ASSERT_TRUE (BsInfo_.iNalNum != 0);
+      SDecoderStatistics sDecStat;
+      rv = decoder_->GetOption (DECODER_OPTION_GET_STATISTICS, &sDecStat);
+      ASSERT_TRUE (rv == 0);
+      uint32_t uiProfile, uiLevel;
+      rv = decoder_->GetOption (DECODER_OPTION_PROFILE, &uiProfile);
+      ASSERT_TRUE (rv == 0);
+      rv = decoder_->GetOption (DECODER_OPTION_LEVEL, &uiLevel);
+      ASSERT_TRUE (rv == 0);
+
+      ASSERT_EQ (sDecStat.uiWidth, (unsigned int) p.width);
+      ASSERT_EQ (sDecStat.uiHeight, (unsigned int) p.height);
+      ASSERT_EQ (sDecStat.uiResolutionChangeTimes, (unsigned int) (i + 1));
+      EXPECT_EQ (sDecStat.iCurrentActiveSpsId, 0);
+      EXPECT_EQ (sDecStat.iCurrentActivePpsId, 0);
+      ASSERT_EQ (sDecStat.uiDecodedFrameCount, (unsigned int) iTotalFrmCnt);
+      ASSERT_EQ (sDecStat.uiProfile, uiProfile);
+      ASSERT_EQ (sDecStat.uiLevel, uiLevel);
+      EXPECT_TRUE (sDecStat.fActualAverageFrameSpeedInMs != 0.);
+      EXPECT_TRUE (sDecStat.fAverageFrameSpeedInMs != 0.);
+      EXPECT_TRUE (sDecStat.iAvgLumaQp != 0);
+      EXPECT_EQ (sDecStat.uiIDRCorrectNum, (unsigned int) (i + 1));
+    }
+    //set next width & height
+    p.width += 16;
+    p.height += 16;
+    if ((unsigned int) p.width > kiWidth) //exceeds max frame size
+      p.width = 16;
+    if ((unsigned int) p.height > kiHeight)
+      p.height = 16;
+    iWidth_ = p.width;
+    iHeight_ = p.height;
+  }
+}
+
+
+//Test parseonly crash cases
+class DecodeParseCrashAPI : public DecodeParseAPI {
+ public:
+  DecodeParseCrashAPI() {
+  }
+  void SetUp() {
+    DecodeParseAPI::SetUp();
+    iWidth_ = 1280;
+    iHeight_ = 720;
+
+    ucBuf_ = NULL;
+    ucBuf_ = new unsigned char[1000000];
+    ASSERT_TRUE (ucBuf_ != NULL);
+
+  }
+  void TearDown() {
+    DecodeParseAPI::TearDown();
+    if (NULL != ucBuf_) {
+      delete[] ucBuf_;
+      ucBuf_ = NULL;
+    }
+    ASSERT_TRUE (ucBuf_ == NULL);
+  }
+
+ protected:
+  unsigned char* ucBuf_;
+};
+
+//#define DEBUG_FILE_SAVE_PARSE_CRA1
+TEST_F (DecodeParseCrashAPI, ParseOnlyCrash_General) {
+  if (fYuv_)
+    fclose (fYuv_);
+  const char* sFileName = "res/Cisco_Absolute_Power_1280x720_30fps.yuv";
+#if defined(ANDROID_NDK)
+  std::string filename = std::string ("/sdcard/") + sFileName;
+  ASSERT_TRUE ((fYuv_ = fopen (filename.c_str(), "rb")) != NULL);
+#else
+  ASSERT_TRUE ((fYuv_ = fopen (sFileName, "rb")) != NULL);
+#endif
+  uint32_t uiGet;
+  encoder_->Uninitialize();
+  //do tests until crash
+  unsigned int uiLoopRound = 0;
+  unsigned char* pucBuf = ucBuf_;
+  int iDecAuSize;
+#ifdef DEBUG_FILE_SAVE_PARSE_CRA1
+  //open file to save tested BS
+  FILE* fDataFile = fopen ("test_parseonly_crash.264", "wb");
+  FILE* fLenFile = fopen ("test_parseonly_crash_len.log", "w");
+  int iFileSize = 0;
+#endif
+
+  do {
+#ifdef DEBUG_FILE_SAVE_PARSE_CRA1
+    int iTotalFrameNum = (rand() % 1200) + 1;
+#else
+    int iTotalFrameNum = (rand() % 100) + 1;
+#endif
+    EncodeDecodeParamBase p = kParamArray[8]; //720p by default
+
+    //Initialize Encoder
+    prepareParam (1, 4, p.width, p.height, p.frameRate, &param_);
+    param_.iRCMode = RC_TIMESTAMP_MODE;
+    param_.iTargetBitrate = p.iTarBitrate;
+    param_.uiIntraPeriod = 0;
+    param_.eSpsPpsIdStrategy = CONSTANT_ID;
+    param_.bEnableBackgroundDetection = true;
+    param_.bEnableSceneChangeDetect = (rand() % 3) ? true : false;
+    param_.bPrefixNalAddingCtrl = 0;// (rand() % 2) ? true : false;
+    param_.iEntropyCodingModeFlag = 0;
+    param_.bEnableFrameSkip = true;
+    param_.iMultipleThreadIdc = 0;
+    param_.sSpatialLayers[0].iSpatialBitrate = p.iTarBitrate;
+    param_.sSpatialLayers[0].iMaxSpatialBitrate = p.iTarBitrate << 1;
+    param_.sSpatialLayers[0].sSliceArgument.uiSliceMode =
+      SM_FIXEDSLCNUM_SLICE; // (rand() % 2) ? SM_SIZELIMITED_SLICE : SM_SINGLE_SLICE;
+    if (param_.sSpatialLayers[0].sSliceArgument.uiSliceMode == SM_SIZELIMITED_SLICE) {
+      param_.sSpatialLayers[0].sSliceArgument.uiSliceSizeConstraint = 1400;
+      param_.uiMaxNalSize = 1400;
+    } else {
+      param_.sSpatialLayers[0].sSliceArgument.uiSliceSizeConstraint = 0;
+      param_.uiMaxNalSize = 0;
+    }
+
+    int rv = encoder_->InitializeExt (&param_);
+    ASSERT_TRUE (rv == cmResultSuccess);
+    decoder_->GetOption (DECODER_OPTION_ERROR_CON_IDC, &uiGet);
+    EXPECT_EQ (uiGet, (uint32_t)ERROR_CON_DISABLE); //default value should be ERROR_CON_SLICE_COPY
+    int32_t iTraceLevel = WELS_LOG_QUIET;
+    encoder_->SetOption (ENCODER_OPTION_TRACE_LEVEL, &iTraceLevel);
+    decoder_->SetOption (DECODER_OPTION_TRACE_LEVEL, &iTraceLevel);
+
+    //Start for enc/dec
+    int iIdx = 0;
+    unsigned char* pData[3] = { NULL };
+
+    EncodeDecodeFileParamBase pInput; //to conform with old functions
+    pInput.width = p.width;
+    pInput.height = p.height;
+    pInput.frameRate = p.frameRate;
+    prepareEncDecParam (pInput);
+    while (iIdx++ < iTotalFrameNum) { // loop in frame
+      EncodeOneFrame (1);
+#ifdef DEBUG_FILE_SAVE_PARSE_CRA1
+      //reset file if file size large
+      if ((info.eFrameType == videoFrameTypeIDR) && (iFileSize >= (1 << 25))) {
+        fclose (fDataFile);
+        fclose (fLenFile);
+        fDataFile = fopen ("test_parseonly_crash.264", "wb");
+        fLenFile = fopen ("test_parseonly_crash_len.log", "w");
+        iFileSize = 0;
+        decoder_->Uninitialize();
+
+        SDecodingParam decParam;
+        memset (&decParam, 0, sizeof (SDecodingParam));
+        decParam.uiTargetDqLayer = UCHAR_MAX;
+        decParam.eEcActiveIdc = ERROR_CON_DISABLE;
+        decParam.bParseOnly = true;
+        decParam.sVideoProperty.eVideoBsType = VIDEO_BITSTREAM_DEFAULT;
+
+        rv = decoder_->Initialize (&decParam);
+        ASSERT_EQ (0, rv);
+      }
+#endif
+      if (info.eFrameType == videoFrameTypeSkip)
+        continue;
+      //deal with packets
+      unsigned char* pBsBuf;
+      iDecAuSize = 0;
+      pucBuf = ucBuf_; //init buf start pos for decoder usage
+      for (int iLayerNum = 0; iLayerNum < info.iLayerNum; iLayerNum++) {
+        SLayerBSInfo* pLayerBsInfo = &info.sLayerInfo[iLayerNum];
+        pBsBuf = info.sLayerInfo[iLayerNum].pBsBuf;
+        int iTotalNalCnt = pLayerBsInfo->iNalCount;
+        for (int iNalCnt = 0; iNalCnt < iTotalNalCnt; iNalCnt++) {  //loop in NAL
+          int iPacketSize = pLayerBsInfo->pNalLengthInByte[iNalCnt];
+          //packet loss
+          int iLossRateRange = (uiLoopRound % 20) + 1; //1-100
+          int iLossRate = (rand() % iLossRateRange);
+          bool bPacketLost = (rand() % 101) > (100 -
+                                               iLossRate);   // [0, (100-iLossRate)] indicates NO LOSS, (100-iLossRate, 100] indicates LOSS
+          if (!bPacketLost) { //no loss
+            memcpy (pucBuf, pBsBuf, iPacketSize);
+            pucBuf += iPacketSize;
+            iDecAuSize += iPacketSize;
+          }
+          //update bs info
+          pBsBuf += iPacketSize;
+        } //nal
+      } //layer
+
+#ifdef DEBUG_FILE_SAVE_PARSE_CRA1
+      //save to file
+      if (iDecAuSize != 0) {
+        fwrite (ucBuf_, 1, iDecAuSize, fDataFile);
+        fflush (fDataFile);
+        iFileSize += iDecAuSize;
+      }
+
+      //save to len file
+      unsigned long ulTmp[4];
+      ulTmp[0] = ulTmp[1] = ulTmp[2] = iIdx;
+      ulTmp[3] = iDecAuSize;
+      fwrite (ulTmp, sizeof (unsigned long), 4, fLenFile); // index, timeStamp, data size
+      fflush (fLenFile);
+#endif
+
+      //decode
+      pData[0] = pData[1] = pData[2] = 0;
+      memset (&BsInfo_, 0, sizeof (SParserBsInfo));
+
+      rv = decoder_->DecodeParser (ucBuf_, iDecAuSize, &BsInfo_);
+      rv = decoder_->DecodeParser (NULL, 0, &BsInfo_); //reconstruction
+      //guarantee decoder EC status
+      decoder_->GetOption (DECODER_OPTION_ERROR_CON_IDC, &uiGet);
+      EXPECT_EQ (uiGet, (uint32_t)ERROR_CON_DISABLE);
+    } //frame
+    uiLoopRound++;
+    if (uiLoopRound >= (1 << 30))
+      uiLoopRound = 0;
+#ifdef DEBUG_FILE_SAVE_PARSE_CRA1
+    if (uiLoopRound % 10 == 0)
+      printf ("run %d times.\n", uiLoopRound);
+  } while (1);
+  fclose (fDataFile);
+  fclose (fLenFile);
+#else
+  }
+  while (0);
+#endif
+
+}
+
--- a/test/api/decoder_test.cpp
+++ b/test/api/decoder_test.cpp
@@ -1,136 +1,140 @@
-#include <gtest/gtest.h>
-#include "utils/HashFunctions.h"
-#include "BaseDecoderTest.h"
-#include <string>
-
-static void UpdateHashFromPlane (SHA1Context* ctx, const uint8_t* plane,
-                                 int width, int height, int stride) {
-  for (int i = 0; i < height; i++) {
-    SHA1Input (ctx, plane, width);
-    plane += stride;
-  }
-}
-
-class DecoderCapabilityTest : public ::testing::Test {
- public:
-  virtual void SetUp() {}
-  virtual void TearDown() {}
-};
-
-TEST_F (DecoderCapabilityTest, JustInit) {
-  SDecoderCapability sDecCap;
-  int iRet = WelsGetDecoderCapability (&sDecCap);
-  ASSERT_TRUE (iRet == 0);
-  EXPECT_EQ (sDecCap.iProfileIdc, 66);
-  EXPECT_EQ (sDecCap.iProfileIop, 0xE0);
-  EXPECT_EQ (sDecCap.iLevelIdc, 32);
-  EXPECT_EQ (sDecCap.iMaxMbps, 216000);
-  EXPECT_EQ (sDecCap.iMaxFs, 5120);
-  EXPECT_EQ (sDecCap.iMaxCpb, 20000);
-  EXPECT_EQ (sDecCap.iMaxDpb, 20480);
-  EXPECT_EQ (sDecCap.iMaxBr, 20000);
-  EXPECT_EQ (sDecCap.bRedPicCap, false);
-}
-
-
-class DecoderInitTest : public ::testing::Test, public BaseDecoderTest {
- public:
-  virtual void SetUp() {
-    BaseDecoderTest::SetUp();
-  }
-  virtual void TearDown() {
-    BaseDecoderTest::TearDown();
-  }
-};
-
-TEST_F (DecoderInitTest, JustInit) {}
-
-struct FileParam {
-  const char* fileName;
-  const char* hashStr;
-};
-
-class DecoderOutputTest : public ::testing::WithParamInterface<FileParam>,
-  public DecoderInitTest, public BaseDecoderTest::Callback {
- public:
-  virtual void SetUp() {
-    DecoderInitTest::SetUp();
-    if (HasFatalFailure()) {
-      return;
-    }
-    SHA1Reset (&ctx_);
-  }
-  virtual void onDecodeFrame (const Frame& frame) {
-    const Plane& y = frame.y;
-    const Plane& u = frame.u;
-    const Plane& v = frame.v;
-    UpdateHashFromPlane (&ctx_, y.data, y.width, y.height, y.stride);
-    UpdateHashFromPlane (&ctx_, u.data, u.width, u.height, u.stride);
-    UpdateHashFromPlane (&ctx_, v.data, v.width, v.height, v.stride);
-  }
- protected:
-  SHA1Context ctx_;
-};
-
-TEST_P (DecoderOutputTest, CompareOutput) {
-  FileParam p = GetParam();
-#if defined(ANDROID_NDK)
-  std::string filename = std::string ("/sdcard/") + p.fileName;
-  DecodeFile (filename.c_str(), this);
-#else
-  DecodeFile (p.fileName, this);
-#endif
-
-  unsigned char digest[SHA_DIGEST_LENGTH];
-  SHA1Result (&ctx_, digest);
-  if (!HasFatalFailure()) {
-    CompareHash (digest, p.hashStr);
-  }
-}
-static const FileParam kFileParamArray[] = {
-  {"res/Adobe_PDF_sample_a_1024x768_50Frms.264", "9aa9a4d9598eb3e1093311826844f37c43e4c521"},
-  {"res/BA1_FT_C.264", "418d152fb85709b6f172799dcb239038df437cfa"},
-  {"res/BA1_Sony_D.jsv", "d94b5ceed5686a03ea682b53d415dee999d27eb6"},
-  {"res/BAMQ1_JVC_C.264", "613cf662c23e5d9e1d7da7fe880a3c427411d171"},
-  {"res/BAMQ2_JVC_C.264", "11bcf3713f520e606a8326d37e00e5fd6c9fd4a0"},
-  {"res/BA_MW_D.264", "afd7a9765961ca241bb4bdf344b31397bec7465a"},
-  {"res/BANM_MW_D.264", "92d924a857a1a7d7d9b224eaa3887830f15dee7f"},
-  {"res/BASQP1_Sony_C.jsv", "3986c8c9d2876d2f0748b925101b152c6ec8b811"},
-  {"res/CI1_FT_B.264", "cbfec15e17a504678b19a1191992131c92a1ac26"},
-  {"res/CI_MW_D.264", "289f29a103c8d95adf2909c646466904be8b06d7"},
-  {"res/CVFC1_Sony_C.jsv", "4641abd7419a5580b97f16e83fd1d566339229d0"},
-  {"res/CVPCMNL1_SVA_C.264", "c2b0d964de727c64b9fccb58f63b567c82bda95a"},
-  {"res/LS_SVA_D.264", "72118f4d1674cf14e58bed7e67cb3aeed3df62b9"},
-  {"res/MIDR_MW_D.264", "9467030f4786f75644bf06a7fc809c36d1959827"},
-  {"res/MPS_MW_A.264", "67f1cfbef0e8025ed60dedccf8d9558d0636be5f"},
-  {"res/MR1_BT_A.h264", "6e585f8359667a16b03e5f49a06f5ceae8d991e0"},
-  {"res/MR1_MW_A.264", "d9e2bf34e9314dcc171ddaea2c5015d0421479f2"},
-  {"res/MR2_MW_A.264", "628b1d4eff04c2d277f7144e23484957dad63cbe"},
-  {"res/MR2_TANDBERG_E.264", "74d618bc7d9d41998edf4c85d51aa06111db6609"},
-  {"res/NL1_Sony_D.jsv", "e401e30669938443c2f02522fd4d5aa1382931a0"},
-  {"res/NLMQ1_JVC_C.264", "f3265c6ddf8db1b2bf604d8a2954f75532e28cda"},
-  {"res/NLMQ2_JVC_C.264", "350ae86ef9ba09390d63a09b7f9ff54184109ca8"},
-  {"res/NRF_MW_E.264", "20732198c04cd2591350a361e4510892f6eed3f0"},
-  {"res/QCIF_2P_I_allIPCM.264", "8724c0866ebdba7ebb7209a0c0c3ae3ae38a0240"},
-  {"res/SVA_BA1_B.264", "c4543b24823b16c424c673616c36c7f537089b2d"},
-  {"res/SVA_BA2_D.264", "98ff2d67860462d8d8bcc9352097c06cc401d97e"},
-  {"res/SVA_Base_B.264", "91f514d81cd33de9f6fbf5dbefdb189cc2e7ecf4"},
-  {"res/SVA_CL1_E.264", "4fe09ab6cdc965ea10a20f1d6dd38aca954412bb"},
-  {"res/SVA_FM1_E.264", "fad08c4ff7cf2307b6579853d0f4652fc26645d3"},
-  {"res/SVA_NL1_B.264", "6d63f72a0c0d833b1db0ba438afff3b4180fb3e6"},
-  {"res/SVA_NL2_E.264", "70453ef8097c94dd190d6d2d1d5cb83c67e66238"},
-  {"res/SarVui.264", "98ff2d67860462d8d8bcc9352097c06cc401d97e"},
-  {"res/Static.264", "91dd4a7a796805b2cd015cae8fd630d96c663f42"},
-  {"res/Zhling_1280x720.264", "ad99f5eaa2d73ae3840e7da67313de8cfc866ce6"},
-  {"res/sps_subsetsps_bothVUI.264", "d3a47032eb5dcc1963343a68e9bea12435bf1e4c"},
-  {"res/test_cif_I_CABAC_PCM.264", "95fdf21470d3bbcf95505abb2164042063a79d98"},
-  {"res/test_cif_I_CABAC_slice.264", "19121bc67f2b13fb8f030504fc0827e1ac6d0fdb"},
-  {"res/test_cif_P_CABAC_slice.264", "521bbd0ba2422369b724c7054545cf107a56f959"},
-  {"res/test_qcif_cabac.264", "587d1d05943f3cd416bf69469975fdee05361e69"},
-  {"res/test_scalinglist_jm.264", "f690a3af2896a53360215fb5d35016bfd41499b3"},
-  {"res/test_vd_1d.264", "5827d2338b79ff82cd091c707823e466197281d3"},
-  {"res/test_vd_rc.264", "eea02e97bfec89d0418593a8abaaf55d02eaa1ca"},
-};
-
-INSTANTIATE_TEST_CASE_P (DecodeFile, DecoderOutputTest,
-                         ::testing::ValuesIn (kFileParamArray));
+#include <gtest/gtest.h>
+#include "utils/HashFunctions.h"
+#include "BaseDecoderTest.h"
+#include <string>
+
+static void UpdateHashFromPlane (SHA1Context* ctx, const uint8_t* plane,
+                                 int width, int height, int stride) {
+  for (int i = 0; i < height; i++) {
+    SHA1Input (ctx, plane, width);
+    plane += stride;
+  }
+}
+
+class DecoderCapabilityTest : public ::testing::Test {
+ public:
+  virtual void SetUp() {}
+  virtual void TearDown() {}
+};
+
+TEST_F (DecoderCapabilityTest, JustInit) {
+  SDecoderCapability sDecCap;
+  int iRet = WelsGetDecoderCapability (&sDecCap);
+  ASSERT_TRUE (iRet == 0);
+  EXPECT_EQ (sDecCap.iProfileIdc, 66);
+  EXPECT_EQ (sDecCap.iProfileIop, 0xE0);
+  EXPECT_EQ (sDecCap.iLevelIdc, 32);
+  EXPECT_EQ (sDecCap.iMaxMbps, 216000);
+  EXPECT_EQ (sDecCap.iMaxFs, 5120);
+  EXPECT_EQ (sDecCap.iMaxCpb, 20000);
+  EXPECT_EQ (sDecCap.iMaxDpb, 20480);
+  EXPECT_EQ (sDecCap.iMaxBr, 20000);
+  EXPECT_EQ (sDecCap.bRedPicCap, false);
+}
+
+
+class DecoderInitTest : public ::testing::Test, public BaseDecoderTest {
+ public:
+  virtual void SetUp() {
+    BaseDecoderTest::SetUp();
+  }
+  virtual void TearDown() {
+    BaseDecoderTest::TearDown();
+  }
+};
+
+TEST_F (DecoderInitTest, JustInit) {}
+
+struct FileParam {
+  const char* fileName;
+  const char* hashStr;
+};
+
+class DecoderOutputTest : public ::testing::WithParamInterface<FileParam>,
+  public DecoderInitTest, public BaseDecoderTest::Callback {
+ public:
+  virtual void SetUp() {
+    DecoderInitTest::SetUp();
+    if (HasFatalFailure()) {
+      return;
+    }
+    SHA1Reset (&ctx_);
+  }
+  virtual void onDecodeFrame (const Frame& frame) {
+    const Plane& y = frame.y;
+    const Plane& u = frame.u;
+    const Plane& v = frame.v;
+    UpdateHashFromPlane (&ctx_, y.data, y.width, y.height, y.stride);
+    UpdateHashFromPlane (&ctx_, u.data, u.width, u.height, u.stride);
+    UpdateHashFromPlane (&ctx_, v.data, v.width, v.height, v.stride);
+  }
+ protected:
+  SHA1Context ctx_;
+};
+
+TEST_P (DecoderOutputTest, CompareOutput) {
+  FileParam p = GetParam();
+#if defined(ANDROID_NDK)
+  std::string filename = std::string ("/sdcard/") + p.fileName;
+  DecodeFile (filename.c_str(), this);
+#else
+  DecodeFile (p.fileName, this);
+#endif
+
+  unsigned char digest[SHA_DIGEST_LENGTH];
+  SHA1Result (&ctx_, digest);
+  if (!HasFatalFailure()) {
+    CompareHash (digest, p.hashStr);
+  }
+}
+static const FileParam kFileParamArray[] = {
+  {"res/Adobe_PDF_sample_a_1024x768_50Frms.264", "9aa9a4d9598eb3e1093311826844f37c43e4c521"},
+  {"res/BA1_FT_C.264", "418d152fb85709b6f172799dcb239038df437cfa"},
+  {"res/BA1_Sony_D.jsv", "d94b5ceed5686a03ea682b53d415dee999d27eb6"},
+  {"res/BAMQ1_JVC_C.264", "613cf662c23e5d9e1d7da7fe880a3c427411d171"},
+  {"res/BAMQ2_JVC_C.264", "11bcf3713f520e606a8326d37e00e5fd6c9fd4a0"},
+  {"res/BA_MW_D.264", "afd7a9765961ca241bb4bdf344b31397bec7465a"},
+  {"res/BANM_MW_D.264", "92d924a857a1a7d7d9b224eaa3887830f15dee7f"},
+  {"res/BASQP1_Sony_C.jsv", "3986c8c9d2876d2f0748b925101b152c6ec8b811"},
+  {"res/CI1_FT_B.264", "cbfec15e17a504678b19a1191992131c92a1ac26"},
+  {"res/CI_MW_D.264", "289f29a103c8d95adf2909c646466904be8b06d7"},
+  {"res/CVFC1_Sony_C.jsv", "4641abd7419a5580b97f16e83fd1d566339229d0"},
+  {"res/CVPCMNL1_SVA_C.264", "c2b0d964de727c64b9fccb58f63b567c82bda95a"},
+  {"res/LS_SVA_D.264", "72118f4d1674cf14e58bed7e67cb3aeed3df62b9"},
+  {"res/MIDR_MW_D.264", "9467030f4786f75644bf06a7fc809c36d1959827"},
+  {"res/MPS_MW_A.264", "67f1cfbef0e8025ed60dedccf8d9558d0636be5f"},
+  {"res/MR1_BT_A.h264", "6e585f8359667a16b03e5f49a06f5ceae8d991e0"},
+  {"res/MR1_MW_A.264", "d9e2bf34e9314dcc171ddaea2c5015d0421479f2"},
+  {"res/MR2_MW_A.264", "628b1d4eff04c2d277f7144e23484957dad63cbe"},
+  {"res/MR2_TANDBERG_E.264", "74d618bc7d9d41998edf4c85d51aa06111db6609"},
+  {"res/NL1_Sony_D.jsv", "e401e30669938443c2f02522fd4d5aa1382931a0"},
+  {"res/NLMQ1_JVC_C.264", "f3265c6ddf8db1b2bf604d8a2954f75532e28cda"},
+  {"res/NLMQ2_JVC_C.264", "350ae86ef9ba09390d63a09b7f9ff54184109ca8"},
+  {"res/NRF_MW_E.264", "20732198c04cd2591350a361e4510892f6eed3f0"},
+  {"res/QCIF_2P_I_allIPCM.264", "8724c0866ebdba7ebb7209a0c0c3ae3ae38a0240"},
+  {"res/SVA_BA1_B.264", "c4543b24823b16c424c673616c36c7f537089b2d"},
+  {"res/SVA_BA2_D.264", "98ff2d67860462d8d8bcc9352097c06cc401d97e"},
+  {"res/SVA_Base_B.264", "91f514d81cd33de9f6fbf5dbefdb189cc2e7ecf4"},
+  {"res/SVA_CL1_E.264", "4fe09ab6cdc965ea10a20f1d6dd38aca954412bb"},
+  {"res/SVA_FM1_E.264", "fad08c4ff7cf2307b6579853d0f4652fc26645d3"},
+  {"res/SVA_NL1_B.264", "6d63f72a0c0d833b1db0ba438afff3b4180fb3e6"},
+  {"res/SVA_NL2_E.264", "70453ef8097c94dd190d6d2d1d5cb83c67e66238"},
+  {"res/SarVui.264", "98ff2d67860462d8d8bcc9352097c06cc401d97e"},
+  {"res/Static.264", "91dd4a7a796805b2cd015cae8fd630d96c663f42"},
+  {"res/Zhling_1280x720.264", "ad99f5eaa2d73ae3840e7da67313de8cfc866ce6"},
+  {"res/sps_subsetsps_bothVUI.264", "d3a47032eb5dcc1963343a68e9bea12435bf1e4c"},
+  {"res/test_cif_I_CABAC_PCM.264", "95fdf21470d3bbcf95505abb2164042063a79d98"},
+  {"res/test_cif_I_CABAC_slice.264", "19121bc67f2b13fb8f030504fc0827e1ac6d0fdb"},
+  {"res/test_cif_P_CABAC_slice.264", "521bbd0ba2422369b724c7054545cf107a56f959"},
+  {"res/test_qcif_cabac.264", "587d1d05943f3cd416bf69469975fdee05361e69"},
+  {"res/test_scalinglist_jm.264", "f690a3af2896a53360215fb5d35016bfd41499b3"},
+  {"res/test_vd_1d.264", "5827d2338b79ff82cd091c707823e466197281d3"},
+  {"res/test_vd_rc.264", "eea02e97bfec89d0418593a8abaaf55d02eaa1ca"},
+  {"res/HighProfile_B_Frame_1920x1080p_30fps.h264", "50c5b8d175598c1d9e604542c4160fa1c6639adc"},
+  {"res/HighProfile_B_Frame_1920x1080p_2397fps.h264", "16967cc21b6853b651cfd0a1a858eeec8565836a"},
+  {"res/HighProfile_B_Frame_1280x544_2397p.h264", "8567f85b1162b70bb2ccc49fd16d8aa27523efcf"},
+  {"res/HighProfile_B_Frame_1280x720_2397p.h264", "5a3176bd9bc1af2a5d58e880088434ee57c57602"},
+};
+
+INSTANTIATE_TEST_CASE_P (DecodeFile, DecoderOutputTest,
+                         ::testing::ValuesIn (kFileParamArray));
--- a/test/decoder/DecUT_DeblockCommon.cpp
+++ b/test/decoder/DecUT_DeblockCommon.cpp
@@ -925,7 +925,7 @@
   sDqLayer.pLumaQp = iLumaQP;
   sDqLayer.pChromaQp = iChromaQP;
 
-  int16_t iMbType[2];
+  uint32_t iMbType[2];
   sDqLayer.pMbType = iMbType;
   sDqLayer.pMbType[0] = MB_TYPE_INTRA4x4;
   sDqLayer.pMbType[1] = MB_TYPE_INTRA4x4;
--- a/test/decoder/DecUT_PredMv.cpp
+++ b/test/decoder/DecUT_PredMv.cpp
@@ -161,7 +161,7 @@
 
 #define TEST_MV_PRED \
   AnchorPredMv (sAncMvPred.iMvArray,  sAncMvPred.iRefIdxArray, iIndex, iBlockWidth, iRef,  sAncMvPred.iMvp); \
-  PredMv (sWelsMvPred.iMvArray, sWelsMvPred.iRefIdxArray, iIndex, iBlockWidth, iRef, sWelsMvPred.iMvp); \
+  PredMv (sWelsMvPred.iMvArray, sWelsMvPred.iRefIdxArray, LIST_0, iIndex, iBlockWidth, iRef, sWelsMvPred.iMvp); \
   bOK = ((sAncMvPred.iMvp[0] == sWelsMvPred.iMvp[0]) && (sAncMvPred.iMvp[1] == sWelsMvPred.iMvp[1])); \
   EXPECT_EQ (bOK, true);
 
@@ -236,7 +236,7 @@
     iRef = (rand() % 18) - 2; //-2~15
     INIT_MV_DATA;
     AnchorPredInter16x8Mv (sAncMvPred.iMvArray,  sAncMvPred.iRefIdxArray, iIndex, iRef,  sAncMvPred.iMvp);
-    PredInter16x8Mv (sWelsMvPred.iMvArray, sWelsMvPred.iRefIdxArray, iIndex, iRef, sWelsMvPred.iMvp);
+    PredInter16x8Mv (sWelsMvPred.iMvArray, sWelsMvPred.iRefIdxArray, LIST_0, iIndex, iRef, sWelsMvPred.iMvp);
     bOK = ((sAncMvPred.iMvp[0] == sWelsMvPred.iMvp[0]) && (sAncMvPred.iMvp[1] == sWelsMvPred.iMvp[1]));
     EXPECT_EQ (bOK, true);
   }
@@ -255,7 +255,7 @@
     iRef = (rand() % 18) - 2; //-2~15
     INIT_MV_DATA;
     AnchorPredInter8x16Mv (sAncMvPred.iMvArray,  sAncMvPred.iRefIdxArray, iIndex, iRef,  sAncMvPred.iMvp);
-    PredInter8x16Mv (sWelsMvPred.iMvArray, sWelsMvPred.iRefIdxArray, iIndex, iRef, sWelsMvPred.iMvp);
+    PredInter8x16Mv (sWelsMvPred.iMvArray, sWelsMvPred.iRefIdxArray, LIST_0, iIndex, iRef, sWelsMvPred.iMvp);
     bOK = ((sAncMvPred.iMvp[0] == sWelsMvPred.iMvp[0]) && (sAncMvPred.iMvp[1] == sWelsMvPred.iMvp[1]));
     EXPECT_EQ (bOK, true);
   }
@@ -419,7 +419,7 @@
   if (pDqLayer->pSliceIdc == NULL)
     return 1;
 
-  pDqLayer->pMbType = (int16_t*) WelsMallocz (pDqLayer->iMbWidth * pDqLayer->iMbHeight * sizeof (int16_t),
+  pDqLayer->pMbType = (uint32_t*) WelsMallocz (pDqLayer->iMbWidth * pDqLayer->iMbHeight * sizeof (uint32_t),
                       "pDqLayer->pMbType");
   if (pDqLayer->pMbType == NULL)
     return 1;