shithub: openh264

ref: aeb5ab4b9981cb346d075755f2d1eee440979fd8
dir: /codec/encoder/core/src/encoder_ext.cpp/

View raw version
/*!
 * \copy
 *     Copyright (c)  2009-2013, Cisco Systems
 *     All rights reserved.
 *
 *     Redistribution and use in source and binary forms, with or without
 *     modification, are permitted provided that the following conditions
 *     are met:
 *
 *        * Redistributions of source code must retain the above copyright
 *          notice, this list of conditions and the following disclaimer.
 *
 *        * Redistributions in binary form must reproduce the above copyright
 *          notice, this list of conditions and the following disclaimer in
 *          the documentation and/or other materials provided with the
 *          distribution.
 *
 *     THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
 *     "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
 *     LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
 *     FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
 *     COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
 *     INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
 *     BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
 *     LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
 *     CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 *     LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
 *     ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 *     POSSIBILITY OF SUCH DAMAGE.
 *
 *
 * \file    encoder_ext.c
 *
 * \brief   core encoder for SVC
 *
 * \date    7/24/2009 Created
 *
 *************************************************************************************
 */

#include "encoder.h"
#include "cpu.h"
#include "utils.h"
#include "svc_enc_golomb.h"
#include "au_set.h"
#include "picture_handle.h"
#include "svc_base_layer_md.h"
#include "svc_encode_slice.h"
#include "svc_mode_decision.h"
#include "decode_mb_aux.h"
#include "deblocking.h"
#include "ref_list_mgr_svc.h"
#include "ls_defines.h"
#include "crt_util_safe_x.h" // Safe CRT routines like utils for cross platforms
#include "slice_multi_threading.h"
#include "measure_time.h"
#include "svc_set_mb_syn.h"

namespace WelsEnc {


int32_t WelsCodeOnePicPartition (sWelsEncCtx* pCtx,
                                 SFrameBSInfo* pFrameBsInfo,
                                 SLayerBSInfo* pLayerBsInfo,
                                 int32_t* pNalIdxInLayer,
                                 int32_t* pLayerSize,
                                 int32_t iFirstMbInPartition,   // first mb inclusive in partition
                                 int32_t iEndMbInPartition,     // end mb exclusive in partition
                                 int32_t iStartSliceIdx
                                );


int32_t WelsBitRateVerification (SLogContext* pLogCtx, SSpatialLayerConfig* pLayerParam, int32_t iLayerId) {
  if ((pLayerParam->iSpatialBitrate <= 0)
      || (static_cast<float> (pLayerParam->iSpatialBitrate) < pLayerParam->fFrameRate)) {
    WelsLog (pLogCtx, WELS_LOG_ERROR, "Invalid bitrate settings in layer %d, bitrate= %d at FrameRate(%f)", iLayerId,
             pLayerParam->iSpatialBitrate, pLayerParam->fFrameRate);
    return ENC_RETURN_UNSUPPORTED_PARA;
  }

  // deal with LEVEL_MAX_BR and MAX_BR setting
  const int32_t iLevelMaxBitrate = (pLayerParam->uiLevelIdc != LEVEL_UNKNOWN) ? (g_ksLevelLimits[pLayerParam->uiLevelIdc -
                                   1].uiMaxBR * CpbBrNalFactor) : UNSPECIFIED_BIT_RATE;
  const int32_t iLevel52MaxBitrate = g_ksLevelLimits[LEVEL_NUMBER - 1].uiMaxBR * CpbBrNalFactor;
  if (UNSPECIFIED_BIT_RATE != iLevelMaxBitrate) {
    if ((pLayerParam->iMaxSpatialBitrate == UNSPECIFIED_BIT_RATE)
        || (pLayerParam->iMaxSpatialBitrate > iLevel52MaxBitrate)) {
      pLayerParam->iMaxSpatialBitrate = iLevelMaxBitrate;
      WelsLog (pLogCtx, WELS_LOG_INFO,
               "Current MaxSpatialBitrate is invalid (UNSPECIFIED_BIT_RATE or larger than LEVEL5_2) but level setting is valid, set iMaxSpatialBitrate to %d from level (%d)",
               pLayerParam->iMaxSpatialBitrate, pLayerParam->uiLevelIdc);
    } else if (pLayerParam->iMaxSpatialBitrate > iLevelMaxBitrate) {
      ELevelIdc iCurLevel = pLayerParam->uiLevelIdc;
      WelsAdjustLevel (pLayerParam);
      WelsLog (pLogCtx, WELS_LOG_INFO,
               "LevelIdc is changed from (%d) to (%d) according to the iMaxSpatialBitrate(%d)",
               iCurLevel, pLayerParam->uiLevelIdc, pLayerParam->iMaxSpatialBitrate);
    }
  } else if ((pLayerParam->iMaxSpatialBitrate != UNSPECIFIED_BIT_RATE)
             && (pLayerParam->iMaxSpatialBitrate > iLevel52MaxBitrate)) {
    // no level limitation, just need to check if iMaxSpatialBitrate is too big from reasonable
    WelsLog (pLogCtx, WELS_LOG_WARNING,
             "No LevelIdc setting and iMaxSpatialBitrate (%d) is considered too big to be valid, changed to UNSPECIFIED_BIT_RATE",
             pLayerParam->iMaxSpatialBitrate);
    pLayerParam->iMaxSpatialBitrate = UNSPECIFIED_BIT_RATE;
  }

  // deal with iSpatialBitrate and iMaxSpatialBitrate setting
  if (pLayerParam->iMaxSpatialBitrate != UNSPECIFIED_BIT_RATE) {
    if (pLayerParam->iMaxSpatialBitrate == pLayerParam->iSpatialBitrate) {
      WelsLog (pLogCtx, WELS_LOG_INFO,
               "Setting MaxSpatialBitrate (%d) the same at SpatialBitrate (%d) will make the actual bit rate lower than SpatialBitrate",
               pLayerParam->iMaxSpatialBitrate, pLayerParam->iSpatialBitrate);
    } else if (pLayerParam->iMaxSpatialBitrate < pLayerParam->iSpatialBitrate) {
      WelsLog (pLogCtx, WELS_LOG_ERROR,
               "MaxSpatialBitrate (%d) should be larger than SpatialBitrate (%d), considering it as error setting",
               pLayerParam->iMaxSpatialBitrate, pLayerParam->iSpatialBitrate);
      return ENC_RETURN_UNSUPPORTED_PARA;
    }
  }
  return ENC_RETURN_SUCCESS;
}

void CheckProfileSetting (SLogContext* pLogCtx, SWelsSvcCodingParam* pParam, int32_t iLayer, EProfileIdc uiProfileIdc) {
  SSpatialLayerConfig* pLayerInfo = &pParam->sSpatialLayers[iLayer];
  if (PRO_UNKNOWN == uiProfileIdc) {
    pLayerInfo->uiProfileIdc = (((iLayer == SPATIAL_LAYER_0)
                                 || pParam->bSimulcastAVC) ? PRO_BASELINE : PRO_SCALABLE_BASELINE);
  } else {
    pLayerInfo->uiProfileIdc = uiProfileIdc;
    if ((iLayer == SPATIAL_LAYER_0) && (uiProfileIdc != PRO_BASELINE)) {
      WelsLog (pLogCtx, WELS_LOG_WARNING, "doesn't support profile(%d), change to baseline profile",
               uiProfileIdc);
      pLayerInfo->uiProfileIdc = PRO_BASELINE;
    }
    if (iLayer > SPATIAL_LAYER_0) {
      if (pParam->bSimulcastAVC && (uiProfileIdc != PRO_BASELINE)) {
        pLayerInfo->uiProfileIdc = PRO_BASELINE;
        WelsLog (pLogCtx, WELS_LOG_WARNING, "doesn't support profile(%d) with bSimulcastAVC, change to baseline profile",
                 uiProfileIdc);
      }
      if ((uiProfileIdc != PRO_BASELINE) || (uiProfileIdc != PRO_SCALABLE_BASELINE)) {
        pLayerInfo->uiProfileIdc = PRO_BASELINE;
        WelsLog (pLogCtx, WELS_LOG_WARNING, "doesn't support profile(%d), change to baseline profile",
                 uiProfileIdc);
      }
    }
  }
}
void CheckLevelSetting (SLogContext* pLogCtx, SWelsSvcCodingParam* pParam, int32_t iLayer, ELevelIdc uiLevelIdc) {
  SSpatialLayerConfig* pLayerInfo = &pParam->sSpatialLayers[iLayer];
  pLayerInfo->uiLevelIdc = uiLevelIdc;
  if (uiLevelIdc > LEVEL_5_2) {
    WelsLog (pLogCtx, WELS_LOG_INFO, "change unexpected levelidc(%d) setting to LEVEL_UNKNOWN", pLayerInfo->uiLevelIdc);
    pLayerInfo->uiLevelIdc = LEVEL_UNKNOWN;
  }
}
void CheckReferenceNumSetting (SLogContext* pLogCtx, SWelsSvcCodingParam* pParam, int32_t iNumRef) {
  int32_t iRefUpperBound = (pParam->iUsageType == CAMERA_VIDEO_REAL_TIME) ?
                           MAX_REFERENCE_PICTURE_COUNT_NUM_CAMERA : MAX_REFERENCE_PICTURE_COUNT_NUM_SCREEN;
  pParam->iNumRefFrame = iNumRef;
  if ((iNumRef < MIN_REF_PIC_COUNT) || (iNumRef > iRefUpperBound)) {
    pParam->iNumRefFrame = AUTO_REF_PIC_COUNT;
    WelsLog (pLogCtx, WELS_LOG_WARNING,
             "doesn't support the number of reference frame(%d) change to auto select mode", iNumRef);
  }
}
/*!
 * \brief   validate checking in parameter configuration
 * \pParam  pParam      SWelsSvcCodingParam*
 * \return  successful - 0; otherwise none 0 for failed
 */
int32_t ParamValidation (SLogContext* pLogCtx, SWelsSvcCodingParam* pCfg) {
  const float fEpsn = 0.000001f;
  int32_t i = 0;

  assert (pCfg != NULL);

  if ((pCfg->iUsageType != CAMERA_VIDEO_REAL_TIME) && (pCfg->iUsageType != SCREEN_CONTENT_REAL_TIME)) {
    WelsLog (pLogCtx, WELS_LOG_ERROR, "ParamValidation(),Invalid usage type = %d", pCfg->iUsageType);
    return ENC_RETURN_UNSUPPORTED_PARA;
  }
  if (pCfg->iUsageType == SCREEN_CONTENT_REAL_TIME) {
    if (pCfg->iSpatialLayerNum > 1) {
      WelsLog (pLogCtx, WELS_LOG_ERROR, "ParamValidation(),Invalid the number of Spatial layer(%d)for screen content",
               pCfg->iSpatialLayerNum);
      return ENC_RETURN_UNSUPPORTED_PARA;
    }
    if (pCfg->bEnableAdaptiveQuant) {
      WelsLog (pLogCtx, WELS_LOG_WARNING,
               "ParamValidation(), AdaptiveQuant(%d) is not supported yet for screen content, auto turned off",
               pCfg->bEnableAdaptiveQuant);
      pCfg->bEnableAdaptiveQuant = false;
    }
    if (pCfg->bEnableSceneChangeDetect == false) {
      pCfg->bEnableSceneChangeDetect = true;
      WelsLog (pLogCtx, WELS_LOG_WARNING,
               "ParamValidation(), screen change detection should be turned on, change bEnableSceneChangeDetect as true");
    }

  }
  if (pCfg->iSpatialLayerNum > 1) {
    for (i = pCfg->iSpatialLayerNum - 1; i > 0; i--) {
      SSpatialLayerConfig* fDlpUp = &pCfg->sSpatialLayers[i];
      SSpatialLayerConfig* fDlp = &pCfg->sSpatialLayers[i - 1];
      if ((fDlp->iVideoWidth > fDlpUp->iVideoWidth) || (fDlp->iVideoHeight > fDlpUp->iVideoHeight)) {
        WelsLog (pLogCtx, WELS_LOG_ERROR,
                 "ParamValidation,Invalid resolution layer(%d) resolution(%d x %d) should be less than the upper spatial layer resolution(%d x %d) ",
                 i, fDlp->iVideoWidth, fDlp->iVideoHeight, fDlpUp->iVideoWidth, fDlpUp->iVideoHeight);
        return ENC_RETURN_UNSUPPORTED_PARA;
      }
    }
  }

  if (!CheckInRangeCloseOpen (pCfg->iLoopFilterDisableIdc, DEBLOCKING_IDC_0, DEBLOCKING_IDC_2 + 1) ||
      !CheckInRangeCloseOpen (pCfg->iLoopFilterAlphaC0Offset, DEBLOCKING_OFFSET_MINUS, DEBLOCKING_OFFSET + 1) ||
      !CheckInRangeCloseOpen (pCfg->iLoopFilterBetaOffset, DEBLOCKING_OFFSET_MINUS, DEBLOCKING_OFFSET + 1)) {
    WelsLog (pLogCtx, WELS_LOG_ERROR,
             "ParamValidation, Invalid iLoopFilterDisableIdc(%d) or iLoopFilterAlphaC0Offset(%d) or iLoopFilterBetaOffset(%d)!",
             pCfg->iLoopFilterDisableIdc, pCfg->iLoopFilterAlphaC0Offset, pCfg->iLoopFilterBetaOffset);
    return ENC_RETURN_UNSUPPORTED_PARA;
  }

  for (i = 0; i < pCfg->iSpatialLayerNum; ++ i) {
    SSpatialLayerInternal* fDlp = &pCfg->sDependencyLayers[i];
    SSpatialLayerConfig* pConfig = &pCfg->sSpatialLayers[i];
    if (fDlp->fOutputFrameRate > fDlp->fInputFrameRate || (fDlp->fInputFrameRate >= -fEpsn
        && fDlp->fInputFrameRate <= fEpsn)
        || (fDlp->fOutputFrameRate >= -fEpsn && fDlp->fOutputFrameRate <= fEpsn)) {
      WelsLog (pLogCtx, WELS_LOG_ERROR,
               "Invalid settings in input frame rate(%.6f) or output frame rate(%.6f) of layer #%d config file..",
               fDlp->fInputFrameRate, fDlp->fOutputFrameRate, i);
      return ENC_RETURN_INVALIDINPUT;
    }
    if (UINT_MAX == GetLogFactor (fDlp->fOutputFrameRate, fDlp->fInputFrameRate)) {
      WelsLog (pLogCtx, WELS_LOG_WARNING,
               "AUTO CORRECT: Invalid settings in input frame rate(%.6f) and output frame rate(%.6f) of layer #%d config file: iResult of output frame rate divided by input frame rate should be power of 2(i.e,in/pOut=2^n). \n Auto correcting Output Framerate to Input Framerate %f!\n",
               fDlp->fInputFrameRate, fDlp->fOutputFrameRate, i, fDlp->fInputFrameRate);
      fDlp->fOutputFrameRate = fDlp->fInputFrameRate;
      pConfig->fFrameRate = fDlp->fOutputFrameRate;
    }
  }

  if ((pCfg->iRCMode != RC_OFF_MODE) && (pCfg->iRCMode != RC_QUALITY_MODE) && (pCfg->iRCMode != RC_BUFFERBASED_MODE)
      && (pCfg->iRCMode != RC_BITRATE_MODE) && (pCfg->iRCMode != RC_TIMESTAMP_MODE)) {
    WelsLog (pLogCtx, WELS_LOG_ERROR, "ParamValidation(),Invalid iRCMode = %d", pCfg->iRCMode);
    return ENC_RETURN_UNSUPPORTED_PARA;
  }
  //bitrate setting validation
  if (pCfg->iRCMode != RC_OFF_MODE) {
    int32_t  iTotalBitrate = 0;
    if (pCfg->iTargetBitrate <= 0) {
      WelsLog (pLogCtx, WELS_LOG_ERROR, "Invalid bitrate settings in total configure, bitrate= %d", pCfg->iTargetBitrate);
      return ENC_RETURN_INVALIDINPUT;
    }
    for (i = 0; i < pCfg->iSpatialLayerNum; ++ i) {
      SSpatialLayerConfig* pSpatialLayer = &pCfg->sSpatialLayers[i];
      iTotalBitrate += pSpatialLayer->iSpatialBitrate;

      if (WelsBitRateVerification (pLogCtx, pSpatialLayer, i) != ENC_RETURN_SUCCESS)
        return ENC_RETURN_INVALIDINPUT;
    }
    if (iTotalBitrate > pCfg->iTargetBitrate) {
      WelsLog (pLogCtx, WELS_LOG_ERROR,
               "Invalid settings in bitrate. the sum of each layer bitrate(%d) is larger than total bitrate setting(%d)",
               iTotalBitrate, pCfg->iTargetBitrate);
      return ENC_RETURN_INVALIDINPUT;
    }
    if ((pCfg->iRCMode == RC_QUALITY_MODE) || (pCfg->iRCMode == RC_BITRATE_MODE) || (pCfg->iRCMode == RC_TIMESTAMP_MODE))
      if (!pCfg->bEnableFrameSkip)
        WelsLog (pLogCtx, WELS_LOG_WARNING,
                 "bEnableFrameSkip = %d,bitrate can't be controlled for RC_QUALITY_MODE,RC_BITRATE_MODE and RC_TIMESTAMP_MODE without enabling skip frame.",
                 pCfg->bEnableFrameSkip);

    if (pCfg->iRCMode == RC_QUALITY_MODE) {
      pCfg->iMinQp = GOM_MIN_QP_MODE;
      pCfg->iMaxQp = GOM_MAX_QP_MODE;
    } else if (pCfg->iUsageType == SCREEN_CONTENT_REAL_TIME) {
      pCfg->iMinQp = MIN_SCREEN_QP;
      pCfg->iMaxQp = MAX_SCREEN_QP;
    } else {
      pCfg->iMinQp = WELS_CLIP3 (pCfg->iMinQp , GOM_MIN_QP_MODE, 51);
      pCfg->iMaxQp = WELS_CLIP3 (pCfg->iMaxQp , 0, 51);
      if (pCfg->iMaxQp <= pCfg->iMinQp)
        pCfg->iMaxQp = 51;

    }

  }
  // ref-frames validation
  if (((pCfg->iUsageType == CAMERA_VIDEO_REAL_TIME) || (pCfg->iUsageType == SCREEN_CONTENT_REAL_TIME))
      ? WelsCheckRefFrameLimitationNumRefFirst (pLogCtx, pCfg)
      : WelsCheckRefFrameLimitationLevelIdcFirst (pLogCtx, pCfg)) {
    WelsLog (pLogCtx, WELS_LOG_ERROR, "WelsCheckRefFrameLimitation failed");
    return ENC_RETURN_INVALIDINPUT;
  }
  return ENC_RETURN_SUCCESS;
}


int32_t ParamValidationExt (SLogContext* pLogCtx, SWelsSvcCodingParam* pCodingParam) {
  int8_t i = 0;
  int32_t iIdx = 0;

  assert (pCodingParam != NULL);
  if (NULL == pCodingParam)
    return ENC_RETURN_INVALIDINPUT;

  if ((pCodingParam->iUsageType != CAMERA_VIDEO_REAL_TIME) && (pCodingParam->iUsageType != SCREEN_CONTENT_REAL_TIME)) {
    WelsLog (pLogCtx, WELS_LOG_ERROR, "ParamValidationExt(),Invalid usage type = %d", pCodingParam->iUsageType);
    return ENC_RETURN_UNSUPPORTED_PARA;
  }
  if ((pCodingParam->iUsageType == SCREEN_CONTENT_REAL_TIME) && (!pCodingParam->bIsLosslessLink
      && pCodingParam->bEnableLongTermReference)) {
    WelsLog (pLogCtx, WELS_LOG_WARNING,
             "ParamValidationExt(), setting lossy link for LTR under screen, which is not supported yet! Auto disabled LTR!");
    pCodingParam->bEnableLongTermReference = false;
  }
  if (pCodingParam->iSpatialLayerNum < 1 || pCodingParam->iSpatialLayerNum > MAX_DEPENDENCY_LAYER) {
    WelsLog (pLogCtx, WELS_LOG_ERROR, "ParamValidationExt(), monitor invalid pCodingParam->iSpatialLayerNum: %d!",
             pCodingParam->iSpatialLayerNum);
    return ENC_RETURN_UNSUPPORTED_PARA;
  }

  if (pCodingParam->iTemporalLayerNum < 1 || pCodingParam->iTemporalLayerNum > MAX_TEMPORAL_LEVEL) {
    WelsLog (pLogCtx, WELS_LOG_ERROR, "ParamValidationExt(), monitor invalid pCodingParam->iTemporalLayerNum: %d!",
             pCodingParam->iTemporalLayerNum);
    return ENC_RETURN_UNSUPPORTED_PARA;
  }

  if (pCodingParam->uiGopSize < 1 || pCodingParam->uiGopSize > MAX_GOP_SIZE) {
    WelsLog (pLogCtx, WELS_LOG_ERROR, "ParamValidationExt(), monitor invalid pCodingParam->uiGopSize: %d!",
             pCodingParam->uiGopSize);
    return ENC_RETURN_UNSUPPORTED_PARA;
  }


  if (pCodingParam->uiIntraPeriod && pCodingParam->uiIntraPeriod < pCodingParam->uiGopSize) {
    WelsLog (pLogCtx, WELS_LOG_ERROR,
             "ParamValidationExt(), uiIntraPeriod(%d) should be not less than that of uiGopSize(%d) or -1 specified!",
             pCodingParam->uiIntraPeriod, pCodingParam->uiGopSize);
    return ENC_RETURN_UNSUPPORTED_PARA;
  }

  if (pCodingParam->uiIntraPeriod && (pCodingParam->uiIntraPeriod & (pCodingParam->uiGopSize - 1)) != 0) {
    WelsLog (pLogCtx, WELS_LOG_ERROR,
             "ParamValidationExt(), uiIntraPeriod(%d) should be multiple of uiGopSize(%d) or -1 specified!",
             pCodingParam->uiIntraPeriod, pCodingParam->uiGopSize);
    return ENC_RETURN_UNSUPPORTED_PARA;
  }

  //about iMultipleThreadIdc, bDeblockingParallelFlag, iLoopFilterDisableIdc, & uiSliceMode
  // (1) Single Thread
  //    if (THREAD==1)//single thread
  //            no parallel_deblocking: bDeblockingParallelFlag = 0;
  // (2) Multi Thread: see uiSliceMode decision
  if (pCodingParam->iMultipleThreadIdc == 1) {
    //now is single thread. no parallel deblocking, set flag=0
    pCodingParam->bDeblockingParallelFlag = false;
  } else {
    pCodingParam->bDeblockingParallelFlag = true;
  }

  // eSpsPpsIdStrategy checkings
  if (pCodingParam->iSpatialLayerNum > 1 && (!pCodingParam->bSimulcastAVC)
      && (SPS_LISTING & pCodingParam->eSpsPpsIdStrategy)) {
    WelsLog (pLogCtx, WELS_LOG_WARNING,
             "ParamValidationExt(), eSpsPpsIdStrategy setting (%d) with multiple svc SpatialLayers (%d) not supported! eSpsPpsIdStrategy adjusted to CONSTANT_ID",
             pCodingParam->eSpsPpsIdStrategy, pCodingParam->iSpatialLayerNum);
    pCodingParam->eSpsPpsIdStrategy = CONSTANT_ID;
  }
  if (pCodingParam->iUsageType == SCREEN_CONTENT_REAL_TIME && (SPS_LISTING & pCodingParam->eSpsPpsIdStrategy)) {
    WelsLog (pLogCtx, WELS_LOG_WARNING,
             "ParamValidationExt(), eSpsPpsIdStrategy setting (%d) with iUsageType (%d) not supported! eSpsPpsIdStrategy adjusted to CONSTANT_ID",
             pCodingParam->eSpsPpsIdStrategy, pCodingParam->iUsageType);
    pCodingParam->eSpsPpsIdStrategy = CONSTANT_ID;
  }

  if (pCodingParam->bSimulcastAVC && (SPS_LISTING & pCodingParam->eSpsPpsIdStrategy)) {
    WelsLog (pLogCtx, WELS_LOG_INFO,
             "ParamValidationExt(), eSpsPpsIdStrategy(%d) under bSimulcastAVC(%d) not supported yet, adjusted to INCREASING_ID",
             pCodingParam->eSpsPpsIdStrategy, pCodingParam->bSimulcastAVC);
    pCodingParam->eSpsPpsIdStrategy = INCREASING_ID;
  }

  if (pCodingParam->bSimulcastAVC && pCodingParam->bPrefixNalAddingCtrl) {
    WelsLog (pLogCtx, WELS_LOG_INFO,
             "ParamValidationExt(), bSimulcastAVC(%d) is not compatible with bPrefixNalAddingCtrl(%d) true, adjusted bPrefixNalAddingCtrl to false",
             pCodingParam->eSpsPpsIdStrategy, pCodingParam->bSimulcastAVC);
    pCodingParam->bPrefixNalAddingCtrl = false;
  }

  for (i = 0; i < pCodingParam->iSpatialLayerNum; ++ i) {
    SSpatialLayerConfig* pSpatialLayer = &pCodingParam->sSpatialLayers[i];
    const int32_t kiPicWidth = pSpatialLayer->iVideoWidth;
    const int32_t kiPicHeight = pSpatialLayer->iVideoHeight;
    uint32_t iMbWidth           = 0;
    uint32_t iMbHeight          = 0;
    int32_t iMbNumInFrame       = 0;
    uint32_t iMaxSliceNum       = MAX_SLICES_NUM;
    if ((kiPicWidth <= 0) || (kiPicHeight <= 0) || (kiPicWidth > MAX_WIDTH) || (kiPicHeight > MAX_HEIGHT)) {
      WelsLog (pLogCtx, WELS_LOG_ERROR,
               "ParamValidationExt(),width(1-%d),height(1-%d)invalid %d x %d in dependency layer settings!", MAX_WIDTH, MAX_HEIGHT,
               kiPicWidth, kiPicHeight);
      return ENC_RETURN_UNSUPPORTED_PARA;
    }
    if ((kiPicWidth & 0x0F) != 0 || (kiPicHeight & 0x0F) != 0) {
      WelsLog (pLogCtx, WELS_LOG_ERROR,
               "ParamValidationExt(), in layer #%d iWidth x iHeight(%d x %d) both should be multiple of 16, can not support with arbitrary size currently!",
               i, kiPicWidth, kiPicHeight);
      return ENC_RETURN_UNSUPPORTED_PARA;
    }

    if (pSpatialLayer->sSliceArgument.uiSliceMode >= SM_RESERVED) {
      WelsLog (pLogCtx, WELS_LOG_ERROR, "ParamValidationExt(), invalid uiSliceMode (%d) settings!",
               pSpatialLayer->sSliceArgument.uiSliceMode);
      return ENC_RETURN_UNSUPPORTED_PARA;
    }
    if ((pCodingParam->uiMaxNalSize != 0) && (pSpatialLayer->sSliceArgument.uiSliceMode != SM_SIZELIMITED_SLICE)) {
      WelsLog (pLogCtx, WELS_LOG_ERROR, "ParamValidationExt(), invalid uiSliceMode (%d) settings!,MaxNalSize = %d",
               pSpatialLayer->sSliceArgument.uiSliceMode, pCodingParam->uiMaxNalSize);
      return ENC_RETURN_UNSUPPORTED_PARA;
    }
    CheckProfileSetting (pLogCtx, pCodingParam, i, pSpatialLayer->uiProfileIdc);
    CheckLevelSetting (pLogCtx, pCodingParam, i, pSpatialLayer->uiLevelIdc);
    //check pSlice settings under multi-pSlice
    if (kiPicWidth <= 16 && kiPicHeight <= 16) {
      //only have one MB, set to single_slice
      pSpatialLayer->sSliceArgument.uiSliceMode = SM_SINGLE_SLICE;
    }
    switch (pSpatialLayer->sSliceArgument.uiSliceMode) {
    case SM_SINGLE_SLICE:
      pSpatialLayer->sSliceArgument.uiSliceNum = 1;
      pSpatialLayer->sSliceArgument.uiSliceSizeConstraint = 0;
      for (iIdx = 0; iIdx < MAX_SLICES_NUM; iIdx++) {
        pSpatialLayer->sSliceArgument.uiSliceMbNum[iIdx] = 0;
      }
      break;
    case SM_FIXEDSLCNUM_SLICE: {
      pSpatialLayer->sSliceArgument.uiSliceSizeConstraint = 0;

      iMbWidth  = (kiPicWidth + 15) >> 4;
      iMbHeight = (kiPicHeight + 15) >> 4;
      iMbNumInFrame = iMbWidth * iMbHeight;
      iMaxSliceNum = MAX_SLICES_NUM;
      if (pSpatialLayer->sSliceArgument.uiSliceNum == 0) {
        int32_t uiCpuCores = 0;
        WelsCPUFeatureDetect (&uiCpuCores); // detect cpu capacity features
        pSpatialLayer->sSliceArgument.uiSliceNum = uiCpuCores;

        if (uiCpuCores <= 1) {
          WelsLog (pLogCtx, WELS_LOG_INFO, "ParamValidationExt(), uiCpuCores = 1, switched to SM_SINGLE_SLICE");
          pSpatialLayer->sSliceArgument.uiSliceMode = SM_SINGLE_SLICE;
          pSpatialLayer->sSliceArgument.uiSliceNum = 1;
          pSpatialLayer->sSliceArgument.uiSliceSizeConstraint = 0;
          for (iIdx = 0; iIdx < MAX_SLICES_NUM; iIdx++) {
            pSpatialLayer->sSliceArgument.uiSliceMbNum[iIdx] = 0;
          }
          break;
        }
      }
      if (pSpatialLayer->sSliceArgument.uiSliceNum > iMaxSliceNum) {
        WelsLog (pLogCtx, WELS_LOG_ERROR, "ParamValidationExt(), invalid uiSliceNum (%d) settings!",
                 pSpatialLayer->sSliceArgument.uiSliceNum);
        return ENC_RETURN_UNSUPPORTED_PARA;
      }
      if (pSpatialLayer->sSliceArgument.uiSliceNum == 1) {
        WelsLog (pLogCtx, WELS_LOG_DEBUG,
                 "ParamValidationExt(), uiSliceNum(%d) you set for SM_FIXEDSLCNUM_SLICE, now turn to SM_SINGLE_SLICE type!",
                 pSpatialLayer->sSliceArgument.uiSliceNum);
        pSpatialLayer->sSliceArgument.uiSliceMode = SM_SINGLE_SLICE;
        break;
      }
      if (pCodingParam->iRCMode != RC_OFF_MODE) { // multiple slices verify with gom
        //check uiSliceNum and set uiSliceMbNum with current uiSliceNum
        if (!GomValidCheckSliceNum (iMbWidth, iMbHeight, &pSpatialLayer->sSliceArgument.uiSliceNum)) {
          WelsLog (pLogCtx, WELS_LOG_WARNING,
                   "ParamValidationExt(), unsupported setting with Resolution and uiSliceNum combination under RC on! So uiSliceNum is changed to %d!",
                   pSpatialLayer->sSliceArgument.uiSliceNum);
        }
        if (pSpatialLayer->sSliceArgument.uiSliceNum <= 1 ||
            !GomValidCheckSliceMbNum (iMbWidth, iMbHeight, &pSpatialLayer->sSliceArgument)) {
          WelsLog (pLogCtx, WELS_LOG_ERROR,
                   "ParamValidationExt(), unsupported setting with Resolution and uiSliceNum (%d) combination  under RC on! Consider setting single slice with this resolution!",
                   pSpatialLayer->sSliceArgument.uiSliceNum);
          return ENC_RETURN_UNSUPPORTED_PARA;
        }
        assert (pSpatialLayer->sSliceArgument.uiSliceNum > 1);
      } else if (!CheckFixedSliceNumMultiSliceSetting (iMbNumInFrame,
                 &pSpatialLayer->sSliceArgument)) { // verify interleave mode settings
        //check uiSliceMbNum with current uiSliceNum
        WelsLog (pLogCtx, WELS_LOG_ERROR, "ParamValidationExt(), invalid uiSliceMbNum (%d) settings!",
                 pSpatialLayer->sSliceArgument.uiSliceMbNum[0]);
        return ENC_RETURN_UNSUPPORTED_PARA;
      }
      // considering the coding efficient and performance, iCountMbNum constraint by MIN_NUM_MB_PER_SLICE condition of multi-pSlice mode settting
      if (iMbNumInFrame <= MIN_NUM_MB_PER_SLICE) {
        pSpatialLayer->sSliceArgument.uiSliceMode = SM_SINGLE_SLICE;
        pSpatialLayer->sSliceArgument.uiSliceNum = 1;
        break;
      }
    }
    break;
    case SM_RASTER_SLICE: {
      pSpatialLayer->sSliceArgument.uiSliceSizeConstraint = 0;

      iMbWidth  = (kiPicWidth + 15) >> 4;
      iMbHeight = (kiPicHeight + 15) >> 4;
      iMbNumInFrame = iMbWidth * iMbHeight;
      iMaxSliceNum = MAX_SLICES_NUM;
      if (pSpatialLayer->sSliceArgument.uiSliceMbNum[0] == 0) {
        if (iMbHeight > iMaxSliceNum) {
          WelsLog (pLogCtx, WELS_LOG_ERROR, "ParamValidationExt(), invalid uiSliceNum (%d) settings more than MAX(%d)!",
                   iMbHeight, MAX_SLICES_NUM);
          return ENC_RETURN_UNSUPPORTED_PARA;
        }
        pSpatialLayer->sSliceArgument.uiSliceNum = iMbHeight;
        for (uint32_t j=0; j<iMbHeight; j++) {
          pSpatialLayer->sSliceArgument.uiSliceMbNum[j] = iMbWidth;
        }
        if (!CheckRowMbMultiSliceSetting (iMbWidth,
                                          &pSpatialLayer->sSliceArgument)) { // verify interleave mode settings
          WelsLog (pLogCtx, WELS_LOG_ERROR, "ParamValidationExt(), invalid uiSliceMbNum (%d) settings!",
                   pSpatialLayer->sSliceArgument.uiSliceMbNum[0]);
          return ENC_RETURN_UNSUPPORTED_PARA;
        }
        break;
      }

      if (!CheckRasterMultiSliceSetting (iMbNumInFrame,
                                         &pSpatialLayer->sSliceArgument)) { // verify interleave mode settings
        WelsLog (pLogCtx, WELS_LOG_ERROR, "ParamValidationExt(), invalid uiSliceMbNum (%d) settings!",
                 pSpatialLayer->sSliceArgument.uiSliceMbNum[0]);
        return ENC_RETURN_UNSUPPORTED_PARA;
      }
      if (pSpatialLayer->sSliceArgument.uiSliceNum <= 0
          || pSpatialLayer->sSliceArgument.uiSliceNum > iMaxSliceNum) { // verify interleave mode settings
        WelsLog (pLogCtx, WELS_LOG_ERROR, "ParamValidationExt(), invalid uiSliceNum (%d) in SM_RASTER_SLICE settings!",
                 pSpatialLayer->sSliceArgument.uiSliceNum);
        return ENC_RETURN_UNSUPPORTED_PARA;
      }
      if (pSpatialLayer->sSliceArgument.uiSliceNum == 1) {
        WelsLog (pLogCtx, WELS_LOG_WARNING,
                 "ParamValidationExt(), pSlice setting for SM_RASTER_SLICE now turn to SM_SINGLE_SLICE!");
        pSpatialLayer->sSliceArgument.uiSliceMode = SM_SINGLE_SLICE;
        break;
      }
      if ((pCodingParam->iRCMode != RC_OFF_MODE) && pSpatialLayer->sSliceArgument.uiSliceNum > 1) {
        WelsLog (pLogCtx, WELS_LOG_ERROR, "ParamValidationExt(), WARNING: GOM based RC do not support SM_RASTER_SLICE!");
      }
      // considering the coding efficient and performance, iCountMbNum constraint by MIN_NUM_MB_PER_SLICE condition of multi-pSlice mode settting
      if (iMbNumInFrame <= MIN_NUM_MB_PER_SLICE) {
        pSpatialLayer->sSliceArgument.uiSliceMode = SM_SINGLE_SLICE;
        pSpatialLayer->sSliceArgument.uiSliceNum = 1;
        break;
      }
    }
    break;
    case SM_SIZELIMITED_SLICE: {
      iMbWidth  = (kiPicWidth + 15) >> 4;
      iMbHeight = (kiPicHeight + 15) >> 4;
      if (pSpatialLayer->sSliceArgument.uiSliceSizeConstraint <= 0) {
        WelsLog (pLogCtx, WELS_LOG_ERROR, "ParamValidationExt(), invalid iSliceSize (%d) settings!",
                 pSpatialLayer->sSliceArgument.uiSliceSizeConstraint);
        return ENC_RETURN_UNSUPPORTED_PARA;
      }

      if (pCodingParam->uiMaxNalSize < (NAL_HEADER_ADD_0X30BYTES + MAX_MACROBLOCK_SIZE_IN_BYTE)) {
        WelsLog (pLogCtx, WELS_LOG_ERROR,
                 "ParamValidationExt(), invalid uiMaxNalSize (%d) settings! should be larger than (NAL_HEADER_ADD_0X30BYTES + MAX_MACROBLOCK_SIZE_IN_BYTE)(%d)",
                 pCodingParam->uiMaxNalSize, (NAL_HEADER_ADD_0X30BYTES + MAX_MACROBLOCK_SIZE_IN_BYTE));
        return ENC_RETURN_UNSUPPORTED_PARA;
      }

      if (pSpatialLayer->sSliceArgument.uiSliceSizeConstraint > (pCodingParam->uiMaxNalSize -
          NAL_HEADER_ADD_0X30BYTES)) {
        WelsLog (pLogCtx, WELS_LOG_WARNING,
                 "ParamValidationExt(), slice mode = SM_SIZELIMITED_SLICE, uiSliceSizeConstraint = %d ,uiMaxNalsize = %d, will take uiMaxNalsize!",
                 pSpatialLayer->sSliceArgument.uiSliceSizeConstraint, pCodingParam->uiMaxNalSize);
        pSpatialLayer->sSliceArgument.uiSliceSizeConstraint =  pCodingParam->uiMaxNalSize - NAL_HEADER_ADD_0X30BYTES;
      }

    }
    break;
    default: {
      WelsLog (pLogCtx, WELS_LOG_ERROR, "ParamValidationExt(), invalid uiSliceMode (%d) settings!",
               pCodingParam->sSpatialLayers[0].sSliceArgument.uiSliceMode);
      return ENC_RETURN_UNSUPPORTED_PARA;

    }
    break;
    }
  }

  return ParamValidation (pLogCtx, pCodingParam);
}


void WelsEncoderApplyFrameRate (SWelsSvcCodingParam* pParam) {
  SSpatialLayerInternal* pLayerParamInternal;
  SSpatialLayerConfig* pLayerParam;
  const float kfEpsn = 0.000001f;
  const int32_t kiNumLayer = pParam->iSpatialLayerNum;
  int32_t i;
  const float kfMaxFrameRate = pParam->fMaxFrameRate;
  float fRatio;
  float fTargetOutputFrameRate;

  //set input frame rate to each layer
  for (i = 0; i < kiNumLayer; i++) {
    pLayerParamInternal = & (pParam->sDependencyLayers[i]);
    pLayerParam = & (pParam->sSpatialLayers[i]);
    fRatio = pLayerParamInternal->fOutputFrameRate / pLayerParamInternal->fInputFrameRate;
    if ((kfMaxFrameRate - pLayerParamInternal->fInputFrameRate) > kfEpsn
        || (kfMaxFrameRate - pLayerParamInternal->fInputFrameRate) < -kfEpsn) {
      pLayerParamInternal->fInputFrameRate = kfMaxFrameRate;
      fTargetOutputFrameRate = kfMaxFrameRate * fRatio;
      pLayerParamInternal->fOutputFrameRate = (fTargetOutputFrameRate >= 6) ? fTargetOutputFrameRate :
                                              (pLayerParamInternal->fInputFrameRate);
      pLayerParam->fFrameRate = pLayerParamInternal->fOutputFrameRate;
      //TODO:{Sijia} from design, there is no sense to have temporal layer when under 6fps even with such setting?
    }
  }
}

int32_t WelsEncoderApplyBitRate (SLogContext* pLogCtx, SWelsSvcCodingParam* pParam, int iLayer) {
  //TODO (Sijia):  this is a temporary solution which keep the ratio between layers
  //but it is also possible to fulfill the bitrate of lower layer first

  SSpatialLayerConfig* pLayerParam;
  const int32_t iNumLayers = pParam->iSpatialLayerNum;
  int32_t i, iOrigTotalBitrate = 0;
  if (iLayer == SPATIAL_LAYER_ALL) {
    //read old BR
    for (i = 0; i < iNumLayers; i++) {
      iOrigTotalBitrate += pParam->sSpatialLayers[i].iSpatialBitrate;
    }
    //write new BR
    float fRatio = 0.0;
    for (i = 0; i < iNumLayers; i++) {
      pLayerParam = & (pParam->sSpatialLayers[i]);
      fRatio = pLayerParam->iSpatialBitrate / (static_cast<float> (iOrigTotalBitrate));
      pLayerParam->iSpatialBitrate = static_cast<int32_t> (pParam->iTargetBitrate * fRatio);

      if (WelsBitRateVerification (pLogCtx, pLayerParam, i) != ENC_RETURN_SUCCESS)
        return ENC_RETURN_UNSUPPORTED_PARA;
    }
  } else {
    return WelsBitRateVerification (pLogCtx, & (pParam->sSpatialLayers[iLayer]), iLayer);
  }
  return ENC_RETURN_SUCCESS;
}
int32_t WelsEncoderApplyBitVaryRang (SLogContext* pLogCtx, SWelsSvcCodingParam* pParam, int32_t iRang) {
  SSpatialLayerConfig* pLayerParam;
  const int32_t iNumLayers = pParam->iSpatialLayerNum;
  for (int32_t i = 0; i < iNumLayers; i++) {
    pLayerParam = & (pParam->sSpatialLayers[i]);
    pLayerParam->iMaxSpatialBitrate = WELS_MIN ((int) (pLayerParam->iSpatialBitrate * (1 + iRang / 100.0)),
                                      pLayerParam->iMaxSpatialBitrate);
    if (WelsBitRateVerification (pLogCtx, pLayerParam, i) != ENC_RETURN_SUCCESS)
      return ENC_RETURN_UNSUPPORTED_PARA;
    WelsLog (pLogCtx, WELS_LOG_INFO,
             "WelsEncoderApplyBitVaryRang:UpdateMaxBitrate layerId= %d,iMaxSpatialBitrate = %d", i, pLayerParam->iMaxSpatialBitrate);
  }
  return ENC_RETURN_SUCCESS;
}

/*!
 * \brief   acquire count number of layers and NALs based on configurable paramters dependency
 * \pParam  pCtx            sWelsEncCtx*
 * \pParam  pParam          SWelsSvcCodingParam*
 * \pParam  pCountLayers    pointer of count number of layers indeed
 * \pParam  iCountNals      pointer of count number of nals indeed
 * \return  0 - successful; otherwise failed
 */
static inline int32_t AcquireLayersNals (sWelsEncCtx** ppCtx, SWelsSvcCodingParam* pParam, int32_t* pCountLayers,
    int32_t* pCountNals) {
  int32_t iCountNumLayers       = 0;
  int32_t iCountNumNals         = 0;
  int32_t iNumDependencyLayers  = 0;
  int32_t iDIndex               = 0;

  if (NULL == pParam || NULL == ppCtx || NULL == *ppCtx)
    return 1;

  iNumDependencyLayers = pParam->iSpatialLayerNum;

  do {
    SSpatialLayerConfig* pDLayer = &pParam->sSpatialLayers[iDIndex];
//    pDLayer->ptr_cfg = pParam;
    int32_t iOrgNumNals = iCountNumNals;

    //Note: Sep. 2010
    //Review this part and suggest no change, since the memory over-use
    //(1) counts little to the overall performance
    //(2) should not be critial even under mobile case
    if (SM_SIZELIMITED_SLICE == pDLayer->sSliceArgument.uiSliceMode) {
      iCountNumNals += MAX_SLICES_NUM;
      // plus prefix NALs
      if (iDIndex == 0)
        iCountNumNals += MAX_SLICES_NUM;
      // MAX_SLICES_NUM < MAX_LAYER_NUM_OF_FRAME ensured at svc_enc_slice_segment.h
      if (iCountNumNals - iOrgNumNals > MAX_NAL_UNITS_IN_LAYER) {
        WelsLog (& (*ppCtx)->sLogCtx, WELS_LOG_ERROR,
                 "AcquireLayersNals(), num_of_slice(%d) > existing slice(%d) at (iDid= %d), max=%d",
                 iCountNumNals, iOrgNumNals, iDIndex, MAX_NAL_UNITS_IN_LAYER);
        return 1;
      }
    } else { /*if ( SM_SINGLE_SLICE != pDLayer->sSliceArgument.uiSliceMode )*/
      const int32_t kiNumOfSlice = GetInitialSliceNum ((pDLayer->iVideoWidth + 0x0f) >> 4,
                                   (pDLayer->iVideoHeight + 0x0f) >> 4,
                                   &pDLayer->sSliceArgument);

      // NEED check iCountNals value in case multiple slices is used
      iCountNumNals += kiNumOfSlice; // for pSlice VCL NALs
      // plus prefix NALs
      if (iDIndex == 0)
        iCountNumNals += kiNumOfSlice;
      assert (iCountNumNals - iOrgNumNals <= MAX_NAL_UNITS_IN_LAYER);
      if (kiNumOfSlice > MAX_SLICES_NUM) {
        WelsLog (& (*ppCtx)->sLogCtx, WELS_LOG_ERROR,
                 "AcquireLayersNals(), num_of_slice(%d) > MAX_SLICES_NUM(%d) per (iDid= %d, qid= %d) settings!",
                 kiNumOfSlice, MAX_SLICES_NUM, iDIndex, 0);
        return 1;
      }
    }

    if (iCountNumNals - iOrgNumNals > MAX_NAL_UNITS_IN_LAYER) {
      WelsLog (& (*ppCtx)->sLogCtx, WELS_LOG_ERROR,
               "AcquireLayersNals(), num_of_nals(%d) > MAX_NAL_UNITS_IN_LAYER(%d) per (iDid= %d, qid= %d) settings!",
               (iCountNumNals - iOrgNumNals), MAX_NAL_UNITS_IN_LAYER, iDIndex, 0);
      return 1;
    }

    iCountNumLayers ++;

    ++ iDIndex;
  } while (iDIndex < iNumDependencyLayers);

  // count parasets
  iCountNumNals += 1 + iNumDependencyLayers + (iCountNumLayers << 1) +
                   iCountNumLayers // plus iCountNumLayers for reserved application
                   + (*ppCtx)->GetNeededSpsNum()
                   + (*ppCtx)->GetNeededSubsetSpsNum()
                   + (*ppCtx)->GetNeededPpsNum();

  // to check number of layers / nals / slices dependencies, 12/8/2010
  if (iCountNumLayers > MAX_LAYER_NUM_OF_FRAME) {
    WelsLog (& (*ppCtx)->sLogCtx, WELS_LOG_ERROR, "AcquireLayersNals(), iCountNumLayers(%d) > MAX_LAYER_NUM_OF_FRAME(%d)!",
             iCountNumLayers, MAX_LAYER_NUM_OF_FRAME);
    return 1;
  }

  if (NULL != pCountLayers)
    *pCountLayers = iCountNumLayers;
  if (NULL != pCountNals)
    *pCountNals = iCountNumNals;
  return 0;
}

static  void  InitMbInfo (sWelsEncCtx* pEnc, SMB*   pList, SDqLayer* pLayer, const int32_t kiDlayerId,
                          const int32_t kiMaxMbNum) {
  int32_t  iMbWidth     = pLayer->iMbWidth;
  int32_t  iMbHeight    = pLayer->iMbHeight;
  int32_t  iIdx;
  int32_t  iMbNum       = iMbWidth * iMbHeight;
  SSliceCtx* pSliceCtx = pLayer->pSliceEncCtx;
  uint32_t uiNeighborAvail;
  const int32_t kiOffset = (kiDlayerId & 0x01) * kiMaxMbNum;
  SMVUnitXY (*pLayerMvUnitBlock4x4)[MB_BLOCK4x4_NUM] = (SMVUnitXY (*)[MB_BLOCK4x4_NUM]) (
        &pEnc->pMvUnitBlock4x4[MB_BLOCK4x4_NUM * kiOffset]);
  int8_t (*pLayerRefIndexBlock8x8)[MB_BLOCK8x8_NUM] = (int8_t (*)[MB_BLOCK8x8_NUM]) (
        &pEnc->pRefIndexBlock4x4[MB_BLOCK8x8_NUM * kiOffset]);

  for (iIdx = 0; iIdx < iMbNum; iIdx++) {
    bool     bLeft;
    bool     bTop;
    bool     bLeftTop;
    bool     bRightTop;
    int32_t  iLeftXY, iTopXY, iLeftTopXY, iRightTopXY;
    uint16_t  uiSliceIdc; //[0..65535] > 36864 of LEVEL5.2

    pList[iIdx].iMbX = pEnc->pStrideTab->pMbIndexX[kiDlayerId][iIdx];
    pList[iIdx].iMbY = pEnc->pStrideTab->pMbIndexY[kiDlayerId][iIdx];
    pList[iIdx].iMbXY = iIdx;

    uiSliceIdc = WelsMbToSliceIdc (pSliceCtx, iIdx);
    iLeftXY = iIdx - 1;
    iTopXY = iIdx - iMbWidth;
    iLeftTopXY = iTopXY - 1;
    iRightTopXY = iTopXY + 1;

    bLeft = (pList[iIdx].iMbX > 0) && (uiSliceIdc == WelsMbToSliceIdc (pSliceCtx, iLeftXY));
    bTop = (pList[iIdx].iMbY > 0) && (uiSliceIdc == WelsMbToSliceIdc (pSliceCtx, iTopXY));
    bLeftTop = (pList[iIdx].iMbX > 0) && (pList[iIdx].iMbY > 0) && (uiSliceIdc ==
               WelsMbToSliceIdc (pSliceCtx, iLeftTopXY));
    bRightTop = (pList[iIdx].iMbX < (iMbWidth - 1)) && (pList[iIdx].iMbY > 0) && (uiSliceIdc ==
                WelsMbToSliceIdc (pSliceCtx, iRightTopXY));

    uiNeighborAvail = 0;
    if (bLeft) {
      uiNeighborAvail |= LEFT_MB_POS;
    }
    if (bTop) {
      uiNeighborAvail |= TOP_MB_POS;
    }
    if (bLeftTop) {
      uiNeighborAvail |= TOPLEFT_MB_POS;
    }
    if (bRightTop) {
      uiNeighborAvail |= TOPRIGHT_MB_POS;
    }
    pList[iIdx].uiSliceIdc      = uiSliceIdc; // merge from svc_hd_opt_b for multiple slices coding
    pList[iIdx].uiNeighborAvail = uiNeighborAvail;
    uiNeighborAvail = 0;
    if (pList[iIdx].iMbX >= BASE_MV_MB_NMB)
      uiNeighborAvail |= LEFT_MB_POS;
    if (pList[iIdx].iMbX <= (iMbWidth - 1 - BASE_MV_MB_NMB))
      uiNeighborAvail |= RIGHT_MB_POS;
    if (pList[iIdx].iMbY >= BASE_MV_MB_NMB)
      uiNeighborAvail |= TOP_MB_POS;
    if (pList[iIdx].iMbY <= (iMbHeight - 1 - BASE_MV_MB_NMB))
      uiNeighborAvail |= BOTTOM_MB_POS;

    pList[iIdx].sMv                     = pLayerMvUnitBlock4x4[iIdx];
    pList[iIdx].pRefIndex               = pLayerRefIndexBlock8x8[iIdx];
    pList[iIdx].pSadCost                = &pEnc->pSadCostMb[iIdx];
    pList[iIdx].pIntra4x4PredMode       = &pEnc->pIntra4x4PredModeBlocks[iIdx * INTRA_4x4_MODE_NUM];
    pList[iIdx].pNonZeroCount           = &pEnc->pNonZeroCountBlocks[iIdx * MB_LUMA_CHROMA_BLOCK4x4_NUM];
  }
}


int32_t   InitMbListD (sWelsEncCtx** ppCtx) {
  int32_t iNumDlayer = (*ppCtx)->pSvcParam->iSpatialLayerNum;
  int32_t iMbSize[MAX_DEPENDENCY_LAYER] = { 0 };
  int32_t iOverallMbNum = 0;
  int32_t iMbWidth = 0;
  int32_t iMbHeight = 0;
  int32_t i;

  if (iNumDlayer > MAX_DEPENDENCY_LAYER)
    return 1;

  for (i = 0; i < iNumDlayer; i++) {
    iMbWidth = ((*ppCtx)->pSvcParam->sSpatialLayers[i].iVideoWidth + 15) >> 4;
    iMbHeight = ((*ppCtx)->pSvcParam->sSpatialLayers[i].iVideoHeight + 15) >> 4;
    iMbSize[i] = iMbWidth  * iMbHeight;
    iOverallMbNum += iMbSize[i];
  }

  (*ppCtx)->ppMbListD = static_cast<SMB**> ((*ppCtx)->pMemAlign->WelsMalloc (iNumDlayer * sizeof (SMB*), "ppMbListD"));
  (*ppCtx)->ppMbListD[0] = NULL;
  WELS_VERIFY_RETURN_PROC_IF (1, (*ppCtx)->ppMbListD == NULL, FreeMemorySvc (ppCtx));
  (*ppCtx)->ppMbListD[0] = static_cast<SMB*> ((*ppCtx)->pMemAlign->WelsMallocz (iOverallMbNum * sizeof (SMB),
                           "ppMbListD[0]"));
  WELS_VERIFY_RETURN_PROC_IF (1, (*ppCtx)->ppMbListD[0] == NULL, FreeMemorySvc (ppCtx));
  (*ppCtx)->ppDqLayerList[0]->sMbDataP = (*ppCtx)->ppMbListD[0];
  InitMbInfo (*ppCtx, (*ppCtx)->ppMbListD[0], (*ppCtx)->ppDqLayerList[0], 0, iMbSize[iNumDlayer - 1]);
  for (i = 1; i < iNumDlayer; i++) {
    (*ppCtx)->ppMbListD[i] = (*ppCtx)->ppMbListD[i - 1] + iMbSize[i - 1];
    (*ppCtx)->ppDqLayerList[i]->sMbDataP = (*ppCtx)->ppMbListD[i];
    InitMbInfo (*ppCtx, (*ppCtx)->ppMbListD[i], (*ppCtx)->ppDqLayerList[i], i, iMbSize[iNumDlayer - 1]);
  }

  return 0;
}

int32_t AllocMbCacheAligned (SMbCache* pMbCache, CMemoryAlign* pMa) {
  pMbCache->pCoeffLevel = (int16_t*)pMa->WelsMalloc (MB_COEFF_LIST_SIZE * sizeof (int16_t), "pMbCache->pCoeffLevel");
  WELS_VERIFY_RETURN_IF (1, (NULL == pMbCache->pCoeffLevel));
  pMbCache->pMemPredMb = (uint8_t*)pMa->WelsMalloc (2 * 256 * sizeof (uint8_t), "pMbCache->pMemPredMb");
  WELS_VERIFY_RETURN_IF (1, (NULL == pMbCache->pMemPredMb));
  pMbCache->pSkipMb = (uint8_t*)pMa->WelsMalloc (384 * sizeof (uint8_t), "pMbCache->pSkipMb");
  WELS_VERIFY_RETURN_IF (1, (NULL == pMbCache->pSkipMb));
  pMbCache->pMemPredBlk4 = (uint8_t*)pMa->WelsMalloc (2 * 16 * sizeof (uint8_t), "pMbCache->pMemPredBlk4");
  WELS_VERIFY_RETURN_IF (1, (NULL == pMbCache->pMemPredBlk4));
  pMbCache->pBufferInterPredMe = (uint8_t*)pMa->WelsMalloc (4 * 640 * sizeof (uint8_t), "pMbCache->pBufferInterPredMe");
  WELS_VERIFY_RETURN_IF (1, (NULL == pMbCache->pBufferInterPredMe));
  pMbCache->pPrevIntra4x4PredModeFlag = (bool*)pMa->WelsMalloc (16 * sizeof (bool),
                                        "pMbCache->pPrevIntra4x4PredModeFlag");
  WELS_VERIFY_RETURN_IF (1, (NULL == pMbCache->pPrevIntra4x4PredModeFlag));
  pMbCache->pRemIntra4x4PredModeFlag = (int8_t*)pMa->WelsMalloc (16 * sizeof (int8_t),
                                        "pMbCache->pRemIntra4x4PredModeFlag");
  WELS_VERIFY_RETURN_IF (1, (NULL == pMbCache->pRemIntra4x4PredModeFlag));
  pMbCache->pDct = (SDCTCoeff*)pMa->WelsMalloc (sizeof (SDCTCoeff), "pMbCache->pDct");
  WELS_VERIFY_RETURN_IF (1, (NULL == pMbCache->pDct));
  return 0;
}

void FreeMbCache (SMbCache* pMbCache, CMemoryAlign* pMa) {
  if (NULL != pMbCache->pCoeffLevel) {
    pMa->WelsFree (pMbCache->pCoeffLevel, "pMbCache->pCoeffLevel");
    pMbCache->pCoeffLevel = NULL;
  }
  if (NULL != pMbCache->pMemPredMb) {
    pMa->WelsFree (pMbCache->pMemPredMb, "pMbCache->pMemPredMb");
    pMbCache->pMemPredMb = NULL;
  }
  if (NULL != pMbCache->pSkipMb) {
    pMa->WelsFree (pMbCache->pSkipMb, "pMbCache->pSkipMb");
    pMbCache->pSkipMb = NULL;
  }
  if (NULL != pMbCache->pMemPredBlk4) {
    pMa->WelsFree (pMbCache->pMemPredBlk4, "pMbCache->pMemPredBlk4");
    pMbCache->pMemPredBlk4 = NULL;
  }
  if (NULL != pMbCache->pBufferInterPredMe) {
    pMa->WelsFree (pMbCache->pBufferInterPredMe, "pMbCache->pBufferInterPredMe");
    pMbCache->pBufferInterPredMe = NULL;
  }
  if (NULL != pMbCache->pPrevIntra4x4PredModeFlag) {
    pMa->WelsFree (pMbCache->pPrevIntra4x4PredModeFlag, "pMbCache->pPrevIntra4x4PredModeFlag");
    pMbCache->pPrevIntra4x4PredModeFlag = NULL;
  }
  if (NULL != pMbCache->pRemIntra4x4PredModeFlag) {
    pMa->WelsFree (pMbCache->pRemIntra4x4PredModeFlag, "pMbCache->pRemIntra4x4PredModeFlag");
    pMbCache->pRemIntra4x4PredModeFlag = NULL;
  }
  if (NULL != pMbCache->pDct) {
    pMa->WelsFree (pMbCache->pDct, "pMbCache->pDct");
    pMbCache->pDct = NULL;
  }
}

static int32_t WelsGenerateNewSps (sWelsEncCtx* pCtx, const bool kbUseSubsetSps, const int32_t iDlayerIndex,
                                   const int32_t iDlayerCount, const int32_t kiSpsId,
                                   SWelsSPS*& pSps, SSubsetSps*& pSubsetSps, bool bSVCBaselayer) {
  int32_t iRet = 0;

  if (!kbUseSubsetSps) {
    pSps        = & (pCtx->pSpsArray[kiSpsId]);
  } else {
    pSubsetSps  = & (pCtx->pSubsetArray[kiSpsId]);
    pSps        = &pSubsetSps->pSps;
  }

  SWelsSvcCodingParam* pParam = pCtx->pSvcParam;
  SSpatialLayerConfig* pDlayerParam = &pParam->sSpatialLayers[iDlayerIndex];
  // Need port pSps/pPps initialization due to spatial scalability changed
  if (!kbUseSubsetSps) {
    iRet = WelsInitSps (pSps, pDlayerParam, &pParam->sDependencyLayers[iDlayerIndex], pParam->uiIntraPeriod,
                        pParam->iMaxNumRefFrame,
                        kiSpsId, pParam->bEnableFrameCroppingFlag, pParam->iRCMode != RC_OFF_MODE, iDlayerCount,
                        bSVCBaselayer);
  } else {
    iRet = WelsInitSubsetSps (pSubsetSps, pDlayerParam, &pParam->sDependencyLayers[iDlayerIndex], pParam->uiIntraPeriod,
                              pParam->iMaxNumRefFrame,
                              kiSpsId, pParam->bEnableFrameCroppingFlag, pParam->iRCMode != RC_OFF_MODE, iDlayerCount);
  }
  return iRet;
}

static bool CheckMatchedSps (SWelsSPS* const pSps1, SWelsSPS* const pSps2) {

  if ((pSps1->iMbWidth != pSps2->iMbWidth)
      || (pSps1->iMbHeight != pSps2->iMbHeight)) {
    return false;
  }

  if ((pSps1->uiLog2MaxFrameNum != pSps2->uiLog2MaxFrameNum)
      || (pSps1->iLog2MaxPocLsb != pSps2->iLog2MaxPocLsb)) {
    return false;
  }

  if (pSps1->iNumRefFrames != pSps2->iNumRefFrames) {
    return false;
  }

  if ((pSps1->bFrameCroppingFlag != pSps2->bFrameCroppingFlag)
      || (pSps1->sFrameCrop.iCropLeft != pSps2->sFrameCrop.iCropLeft)
      || (pSps1->sFrameCrop.iCropRight != pSps2->sFrameCrop.iCropRight)
      || (pSps1->sFrameCrop.iCropTop != pSps2->sFrameCrop.iCropTop)
      || (pSps1->sFrameCrop.iCropBottom != pSps2->sFrameCrop.iCropBottom)
     ) {
    return false;
  }

  if ((pSps1->uiProfileIdc != pSps2->uiProfileIdc)
      || (pSps1->bConstraintSet0Flag != pSps2->bConstraintSet0Flag)
      || (pSps1->bConstraintSet1Flag != pSps2->bConstraintSet1Flag)
      || (pSps1->bConstraintSet2Flag != pSps2->bConstraintSet2Flag)
      || (pSps1->bConstraintSet3Flag != pSps2->bConstraintSet3Flag)
      || (pSps1->iLevelIdc != pSps2->iLevelIdc)) {
    return false;
  }

  return true;
}

static bool CheckMatchedSubsetSps (SSubsetSps* const pSubsetSps1, SSubsetSps* const pSubsetSps2) {
  if (!CheckMatchedSps (&pSubsetSps1->pSps, &pSubsetSps2->pSps)) {
    return false;
  }

  if ((pSubsetSps1->sSpsSvcExt.iExtendedSpatialScalability      != pSubsetSps2->sSpsSvcExt.iExtendedSpatialScalability)
      || (pSubsetSps1->sSpsSvcExt.bAdaptiveTcoeffLevelPredFlag  != pSubsetSps2->sSpsSvcExt.bAdaptiveTcoeffLevelPredFlag)
      || (pSubsetSps1->sSpsSvcExt.bSeqTcoeffLevelPredFlag != pSubsetSps2->sSpsSvcExt.bSeqTcoeffLevelPredFlag)
      || (pSubsetSps1->sSpsSvcExt.bSliceHeaderRestrictionFlag != pSubsetSps2->sSpsSvcExt.bSliceHeaderRestrictionFlag)) {
    return false;
  }

  return true;
}

int32_t FindExistingSps (SWelsSvcCodingParam* pParam, const bool kbUseSubsetSps, const int32_t iDlayerIndex,
                         const int32_t iDlayerCount, const int32_t iSpsNumInUse,
                         SWelsSPS* pSpsArray,
                         SSubsetSps* pSubsetArray, bool bSVCBaseLayer) {
  SSpatialLayerConfig* pDlayerParam = &pParam->sSpatialLayers[iDlayerIndex];

  assert (iSpsNumInUse <= MAX_SPS_COUNT);
  if (!kbUseSubsetSps) {
    SWelsSPS sTmpSps;
    WelsInitSps (&sTmpSps, pDlayerParam, &pParam->sDependencyLayers[iDlayerIndex], pParam->uiIntraPeriod,
                 pParam->iMaxNumRefFrame,
                 0, pParam->bEnableFrameCroppingFlag, pParam->iRCMode != RC_OFF_MODE, iDlayerCount,
                 bSVCBaseLayer);
    for (int32_t iId = 0; iId < iSpsNumInUse; iId++) {
      if (CheckMatchedSps (&sTmpSps, &pSpsArray[iId])) {
        return iId;
      }
    }
  } else {
    SSubsetSps sTmpSubsetSps;
    WelsInitSubsetSps (&sTmpSubsetSps, pDlayerParam, &pParam->sDependencyLayers[iDlayerIndex], pParam->uiIntraPeriod,
                       pParam->iMaxNumRefFrame,
                       0, pParam->bEnableFrameCroppingFlag, pParam->iRCMode != RC_OFF_MODE, iDlayerCount);

    for (int32_t iId = 0; iId < iSpsNumInUse; iId++) {
      if (CheckMatchedSubsetSps (&sTmpSubsetSps, &pSubsetArray[iId])) {
        return iId;
      }
    }
  }

  return INVALID_ID;
}

int32_t FindExistingPps (SWelsSPS* pSps, SSubsetSps* pSubsetSps, const bool kbUseSubsetSps, const int32_t iSpsId,
                         const bool kbEntropyCodingFlag, const int32_t iPpsNumInUse,
                         SWelsPPS* pPpsArray) {
#if !defined(DISABLE_FMO_FEATURE)
  // feature not supported yet
  return INVALID_ID;
#endif//!DISABLE_FMO_FEATURE

  SWelsPPS sTmpPps;
  WelsInitPps (&sTmpPps,
               pSps,
               pSubsetSps,
               0,
               true,
               kbUseSubsetSps,
               kbEntropyCodingFlag);

  assert (iPpsNumInUse <= MAX_PPS_COUNT);
  for (int32_t iId = 0; iId < iPpsNumInUse; iId++) {
    if ((sTmpPps.iSpsId == pPpsArray[iId].iSpsId)
        && (sTmpPps.bEntropyCodingModeFlag == pPpsArray[iId].bEntropyCodingModeFlag)
        && (sTmpPps.iPicInitQp == pPpsArray[iId].iPicInitQp)
        && (sTmpPps.iPicInitQs == pPpsArray[iId].iPicInitQs)
        && (sTmpPps.uiChromaQpIndexOffset == pPpsArray[iId].uiChromaQpIndexOffset)
        && (sTmpPps.bDeblockingFilterControlPresentFlag == pPpsArray[iId].bDeblockingFilterControlPresentFlag)
       ) {
      return iId;
    }
  }

  return INVALID_ID;
}

static inline int32_t InitpSliceInLayer (sWelsEncCtx** ppCtx, SDqLayer* pDqLayer, CMemoryAlign* pMa,
    const int32_t iMaxSliceNum, bool bMultithread) {
  int32_t iSliceIdx = 0;
  while (iSliceIdx < iMaxSliceNum) {
    SSlice* pSlice = &pDqLayer->sLayerInfo.pSliceInLayer[iSliceIdx];
    pSlice->uiSliceIdx = iSliceIdx;
    if (bMultithread)
      pSlice->pSliceBsa = & (*ppCtx)->pSliceBs[iSliceIdx].sBsWrite;
    else
      pSlice->pSliceBsa = & (*ppCtx)->pOut->sBsWrite;
    if (AllocMbCacheAligned (&pSlice->sMbCacheInfo, pMa)) {
      FreeMemorySvc (ppCtx);
      return ENC_RETURN_MEMALLOCERR;
    }
    ++ iSliceIdx;
  }
  return ENC_RETURN_SUCCESS;
}

/*!
 * \brief   initialize ppDqLayerList and slicepEncCtx_list due to count number of layers available
 * \pParam  pCtx            sWelsEncCtx*
 * \return  0 - successful; otherwise failed
 */
static inline int32_t InitDqLayers (sWelsEncCtx** ppCtx, SExistingParasetList* pExistingParasetList) {
  SWelsSvcCodingParam* pParam   = NULL;
  SWelsSPS* pSps                = NULL;
  SSubsetSps* pSubsetSps        = NULL;
  SWelsPPS* pPps                = NULL;
  CMemoryAlign* pMa             = NULL;
  int32_t iDlayerCount          = 0;
  int32_t iDlayerIndex          = 0;
  uint32_t iSpsId               = 0;
  uint32_t iPpsId               = 0;
  uint32_t iNumRef              = 0;
  int32_t iResult               = 0;

  if (NULL == ppCtx || NULL == *ppCtx)
    return 1;

  pMa           = (*ppCtx)->pMemAlign;
  pParam        = (*ppCtx)->pSvcParam;
  iDlayerCount  = pParam->iSpatialLayerNum;
  iNumRef       = pParam->iMaxNumRefFrame;

  const int32_t kiFeatureStrategyIndex = FME_DEFAULT_FEATURE_INDEX;
  const int32_t kiMe16x16 = ME_DIA_CROSS;
  const int32_t kiMe8x8 = ME_DIA_CROSS_FME;
  const int32_t kiNeedFeatureStorage = (pParam->iUsageType != SCREEN_CONTENT_REAL_TIME) ? 0 :
                                       ((kiFeatureStrategyIndex << 16) + ((kiMe16x16 & 0x00FF) << 8) + (kiMe8x8 & 0x00FF));

  iDlayerIndex = 0;
  while (iDlayerIndex < iDlayerCount) {
    SRefList* pRefList          = NULL;
    uint32_t i                  = 0;
    const int32_t kiWidth       = pParam->sSpatialLayers[iDlayerIndex].iVideoWidth;
    const int32_t kiHeight      = pParam->sSpatialLayers[iDlayerIndex].iVideoHeight;
    int32_t iPicWidth           = WELS_ALIGN (kiWidth, MB_WIDTH_LUMA) + (PADDING_LENGTH << 1);  // with iWidth of horizon
    int32_t iPicChromaWidth     = iPicWidth >> 1;

    iPicWidth = WELS_ALIGN (iPicWidth, 32); // 32(or 16 for chroma below) to match original imp. here instead of iCacheLineSize
    iPicChromaWidth = WELS_ALIGN (iPicChromaWidth, 16);

    WelsGetEncBlockStrideOffset ((*ppCtx)->pStrideTab->pStrideEncBlockOffset[iDlayerIndex], iPicWidth, iPicChromaWidth);

    // pRef list
    pRefList = (SRefList*)pMa->WelsMallocz (sizeof (SRefList), "pRefList");
    WELS_VERIFY_RETURN_PROC_IF (1, (NULL == pRefList), FreeMemorySvc (ppCtx))
    do {
      pRefList->pRef[i] = AllocPicture (pMa, kiWidth, kiHeight, true,
                                        (iDlayerIndex == iDlayerCount - 1) ? kiNeedFeatureStorage : 0); // to use actual size of current layer
      WELS_VERIFY_RETURN_PROC_IF (1, (NULL == pRefList->pRef[i]), FreeMemorySvc (ppCtx))
      ++ i;
    } while (i < 1 + iNumRef);

    pRefList->pNextBuffer = pRefList->pRef[0];
    (*ppCtx)->ppRefPicListExt[iDlayerIndex] = pRefList;
    ++ iDlayerIndex;
  }

  iDlayerIndex = 0;
  while (iDlayerIndex < iDlayerCount) {
    SDqLayer* pDqLayer              = NULL;
    SSpatialLayerConfig* pDlayer    = &pParam->sSpatialLayers[iDlayerIndex];
    const int32_t kiMbW             = (pDlayer->iVideoWidth + 0x0f) >> 4;
    const int32_t kiMbH             = (pDlayer->iVideoHeight + 0x0f) >> 4;
    int32_t iMaxSliceNum            = 1;
    const int32_t kiSliceNum = GetInitialSliceNum (kiMbW, kiMbH, &pDlayer->sSliceArgument);
    if (iMaxSliceNum < kiSliceNum)
      iMaxSliceNum = kiSliceNum;

    // pDq layers list
    pDqLayer = (SDqLayer*)pMa->WelsMallocz (sizeof (SDqLayer), "pDqLayer");
    WELS_VERIFY_RETURN_PROC_IF (1, (NULL == pDqLayer), FreeMemorySvc (ppCtx))

    // for dynamic slicing mode
    if (SM_SIZELIMITED_SLICE == pDlayer->sSliceArgument.uiSliceMode) {
      const int32_t iSize                       = pParam->iCountThreadsNum * sizeof (int32_t);

      pDqLayer->pNumSliceCodedOfPartition       = (int32_t*)pMa->WelsMallocz (iSize, "pNumSliceCodedOfPartition");
      pDqLayer->pLastCodedMbIdxOfPartition      = (int32_t*)pMa->WelsMallocz (iSize, "pLastCodedMbIdxOfPartition");
      pDqLayer->pLastMbIdxOfPartition           = (int32_t*)pMa->WelsMallocz (iSize, "pLastMbIdxOfPartition");

      WELS_VERIFY_RETURN_PROC_IF (1,
                                  (NULL == pDqLayer->pNumSliceCodedOfPartition ||
                                   NULL == pDqLayer->pLastCodedMbIdxOfPartition ||
                                   NULL == pDqLayer->pLastMbIdxOfPartition),
                                  FreeMemorySvc (ppCtx))
    }

    pDqLayer->iMbWidth  = kiMbW;
    pDqLayer->iMbHeight = kiMbH;
    {
      pDqLayer->sLayerInfo.pSliceInLayer = (SSlice*)pMa->WelsMallocz (sizeof (SSlice) * iMaxSliceNum, "pSliceInLayer");
      WELS_VERIFY_RETURN_PROC_IF (1, (NULL == pDqLayer->sLayerInfo.pSliceInLayer), FreeMemorySvc (ppCtx))

      int32_t iReturn = InitpSliceInLayer (ppCtx, pDqLayer, pMa, iMaxSliceNum, pParam->iMultipleThreadIdc > 1);
      WELS_VERIFY_RETURN_PROC_IF (1, (ENC_RETURN_SUCCESS != iReturn), FreeMemorySvc (ppCtx))
    }

    //deblocking parameters initialization
    //target-layer deblocking
    pDqLayer->iLoopFilterDisableIdc     = pParam->iLoopFilterDisableIdc;
    pDqLayer->iLoopFilterAlphaC0Offset  = (pParam->iLoopFilterAlphaC0Offset) << 1;
    pDqLayer->iLoopFilterBetaOffset     = (pParam->iLoopFilterBetaOffset) << 1;
    //parallel deblocking
    pDqLayer->bDeblockingParallelFlag   = pParam->bDeblockingParallelFlag;

    //deblocking parameter adjustment
    if (SM_SINGLE_SLICE == pDlayer->sSliceArgument.uiSliceMode) {
      //iLoopFilterDisableIdc: will be 0 or 1 under single_slice
      if (2 == pParam->iLoopFilterDisableIdc) {
        pDqLayer->iLoopFilterDisableIdc = 0;
      }
      //bDeblockingParallelFlag
      pDqLayer->bDeblockingParallelFlag = false;
    } else {
      //multi-pSlice
      if (0 == pDqLayer->iLoopFilterDisableIdc) {
        pDqLayer->bDeblockingParallelFlag = false;
      }
    }

    //
    if (kiNeedFeatureStorage && iDlayerIndex == iDlayerCount - 1) {
      pDqLayer->pFeatureSearchPreparation = static_cast<SFeatureSearchPreparation*> (pMa->WelsMallocz (sizeof (
                                              SFeatureSearchPreparation), "pFeatureSearchPreparation"));
      WELS_VERIFY_RETURN_PROC_IF (1, NULL == pDqLayer->pFeatureSearchPreparation, FreeMemorySvc (ppCtx));
      int32_t iReturn = RequestFeatureSearchPreparation (pMa, pDlayer->iVideoWidth, pDlayer->iVideoHeight,
                        kiNeedFeatureStorage,
                        pDqLayer->pFeatureSearchPreparation);
      WELS_VERIFY_RETURN_PROC_IF (1, ENC_RETURN_SUCCESS != iReturn, FreeMemorySvc (ppCtx));
    } else {
      pDqLayer->pFeatureSearchPreparation = NULL;
    }

    (*ppCtx)->ppDqLayerList[iDlayerIndex] = pDqLayer;

    ++ iDlayerIndex;
  }

  // for dynamically malloc for parameter sets memory instead of maximal items for standard to reduce size, 3/18/2010
  const int32_t kiNeededSpsNum = (*ppCtx)->GetNeededSpsNum();
  const int32_t kiNeededSubsetSpsNum = (*ppCtx)->GetNeededSubsetSpsNum();
  (*ppCtx)->pSpsArray = (SWelsSPS*)pMa->WelsMalloc (kiNeededSpsNum * sizeof (SWelsSPS), "pSpsArray");
  WELS_VERIFY_RETURN_PROC_IF (1, (NULL == (*ppCtx)->pSpsArray), FreeMemorySvc (ppCtx))
  if (kiNeededSubsetSpsNum > 0) {
    (*ppCtx)->pSubsetArray = (SSubsetSps*)pMa->WelsMalloc (kiNeededSubsetSpsNum * sizeof (SSubsetSps), "pSubsetArray");
    WELS_VERIFY_RETURN_PROC_IF (1, (NULL == (*ppCtx)->pSubsetArray), FreeMemorySvc (ppCtx))
  } else {
    (*ppCtx)->pSubsetArray = NULL;
  }

  if ((SPS_LISTING & pParam->eSpsPpsIdStrategy) && (NULL != pExistingParasetList)) {
    (*ppCtx)->sPSOVector.uiInUseSpsNum = pExistingParasetList->uiInUseSpsNum;
    memcpy ((*ppCtx)->pSpsArray, pExistingParasetList->sSps, MAX_SPS_COUNT * sizeof (SWelsSPS));

    if (kiNeededSubsetSpsNum > 0) {
      (*ppCtx)->sPSOVector.uiInUseSubsetSpsNum = pExistingParasetList->uiInUseSubsetSpsNum;
      memcpy ((*ppCtx)->pSubsetArray, pExistingParasetList->sSubsetSps, MAX_SPS_COUNT * sizeof (SSubsetSps));
    } else {
      (*ppCtx)->sPSOVector.uiInUseSubsetSpsNum = 0;
    }
  }

  // PPS
  const int32_t kiNeededPpsNum = (*ppCtx)->GetNeededPpsNum();
  (*ppCtx)->pPPSArray = (SWelsPPS*)pMa->WelsMalloc (kiNeededPpsNum * sizeof (SWelsPPS), "pPPSArray");
  WELS_VERIFY_RETURN_PROC_IF (1, (NULL == (*ppCtx)->pPPSArray), FreeMemorySvc (ppCtx))

  // copy from existing if the pointer exists
  if ((SPS_PPS_LISTING == pParam->eSpsPpsIdStrategy) && (NULL != pExistingParasetList)) {
    (*ppCtx)->sPSOVector.uiInUsePpsNum = pExistingParasetList->uiInUsePpsNum;
    memcpy ((*ppCtx)->pPPSArray, pExistingParasetList->sPps, MAX_PPS_COUNT * sizeof (SWelsPPS));
  }


  if (INCREASING_ID & pParam->eSpsPpsIdStrategy) {
    (*ppCtx)->pPSOVector = & ((*ppCtx)->sPSOVector);
  } else {
    (*ppCtx)->pPSOVector = NULL;
  }

  (*ppCtx)->pDqIdcMap = (SDqIdc*)pMa->WelsMallocz (iDlayerCount * sizeof (SDqIdc), "pDqIdcMap");
  WELS_VERIFY_RETURN_PROC_IF (1, (NULL == (*ppCtx)->pDqIdcMap), FreeMemorySvc (ppCtx))

  iDlayerIndex = 0;
  while (iDlayerIndex < iDlayerCount) {
    SDqIdc* pDqIdc                      = & (*ppCtx)->pDqIdcMap[iDlayerIndex];
    const bool bUseSubsetSps            = (!pParam->bSimulcastAVC) && (iDlayerIndex > BASE_DEPENDENCY_ID);
    SSpatialLayerConfig* pDlayerParam   = &pParam->sSpatialLayers[iDlayerIndex];
    bool bSvcBaselayer = (!pParam->bSimulcastAVC) && (iDlayerCount > BASE_DEPENDENCY_ID)
                         && (iDlayerIndex == BASE_DEPENDENCY_ID);
    pDqIdc->uiSpatialId = iDlayerIndex;

    if (! (SPS_LISTING & pParam->eSpsPpsIdStrategy)) {
      WelsGenerateNewSps (*ppCtx, bUseSubsetSps, iDlayerIndex,
                          iDlayerCount, iSpsId, pSps, pSubsetSps, bSvcBaselayer);
    } else {
      //SPS_LISTING_AND_PPS_INCREASING == pParam->eSpsPpsIdStrategy
      //check if the current param can fit in an existing SPS
      const int32_t kiFoundSpsId = FindExistingSps ((*ppCtx)->pSvcParam, bUseSubsetSps, iDlayerIndex, iDlayerCount,
                                   bUseSubsetSps ? ((*ppCtx)->sPSOVector.uiInUseSubsetSpsNum) : ((*ppCtx)->sPSOVector.uiInUseSpsNum),
                                   (*ppCtx)->pSpsArray,
                                   (*ppCtx)->pSubsetArray, bSvcBaselayer);


      if (INVALID_ID != kiFoundSpsId) {
        //if yes, set pSps or pSubsetSps to it
        iSpsId = kiFoundSpsId;
        if (!bUseSubsetSps) {
          pSps = & ((*ppCtx)->pSpsArray[kiFoundSpsId]);
        } else {
          pSubsetSps = & ((*ppCtx)->pSubsetArray[kiFoundSpsId]);
        }
      } else {
        //if no, generate a new SPS as usual
        if ((SPS_PPS_LISTING == pParam->eSpsPpsIdStrategy) && (MAX_PPS_COUNT <= (*ppCtx)->sPSOVector.uiInUsePpsNum)) {
          //check if we can generate new SPS or not
          WelsLog (& (*ppCtx)->sLogCtx, WELS_LOG_ERROR,
                   "InitDqLayers(), cannot generate new SPS under the SPS_PPS_LISTING mode!");
          return ENC_RETURN_UNSUPPORTED_PARA;
        }

        iSpsId = (!bUseSubsetSps) ? ((*ppCtx)->sPSOVector.uiInUseSpsNum++) : ((*ppCtx)->sPSOVector.uiInUseSubsetSpsNum++);
        if (iSpsId >= MAX_SPS_COUNT) {
          if (SPS_PPS_LISTING == pParam->eSpsPpsIdStrategy) {
            WelsLog (& (*ppCtx)->sLogCtx, WELS_LOG_ERROR,
                     "InitDqLayers(), cannot generate new SPS under the SPS_PPS_LISTING mode!");
            return ENC_RETURN_UNSUPPORTED_PARA;
          }
          // reset current list
          if (!bUseSubsetSps) {
            (*ppCtx)->sPSOVector.uiInUseSpsNum = 1;
            memset ((*ppCtx)->pSpsArray, 0, MAX_SPS_COUNT * sizeof (SWelsSPS));
          } else {
            (*ppCtx)->sPSOVector.uiInUseSubsetSpsNum = 1;
            memset ((*ppCtx)->pSubsetArray, 0, MAX_SPS_COUNT * sizeof (SSubsetSps));
          }
          iSpsId = 0;
        }

        WelsGenerateNewSps (*ppCtx, bUseSubsetSps, iDlayerIndex,
                            iDlayerCount, iSpsId, pSps, pSubsetSps, bSvcBaselayer);
      }
    }

    if (! (SPS_PPS_LISTING == pParam->eSpsPpsIdStrategy)) {
      pPps = & (*ppCtx)->pPPSArray[iPpsId];
      // initialize pPps
      WelsInitPps (pPps, pSps, pSubsetSps, iPpsId, true, bUseSubsetSps, pParam->iEntropyCodingModeFlag != 0);
    } else {
      const int32_t kiFoundPpsId = FindExistingPps (pSps, pSubsetSps, bUseSubsetSps, iSpsId,
                                   pParam->iEntropyCodingModeFlag != 0,
                                   (*ppCtx)->sPSOVector.uiInUsePpsNum,
                                   (*ppCtx)->pPPSArray);


      if (INVALID_ID != kiFoundPpsId) {
        //if yes, set pPps to it
        iPpsId = kiFoundPpsId;
        pPps = & ((*ppCtx)->pPPSArray[kiFoundPpsId]);
      } else {
        iPpsId = ((*ppCtx)->sPSOVector.uiInUsePpsNum++);
        pPps    = & (*ppCtx)->pPPSArray[iPpsId];
        WelsInitPps (pPps, pSps, pSubsetSps, iPpsId, true, bUseSubsetSps, pParam->iEntropyCodingModeFlag != 0);
      }
    }

    // Not using FMO in SVC coding so far, come back if need FMO
    {
      iResult = InitSlicePEncCtx (& (*ppCtx)->pSliceCtxList[iDlayerIndex],
                                  (*ppCtx)->pMemAlign,
                                  false,
                                  pSps->iMbWidth,
                                  pSps->iMbHeight,
                                  & (pDlayerParam->sSliceArgument),
                                  pPps);
      if (iResult) {
        WelsLog (& (*ppCtx)->sLogCtx, WELS_LOG_WARNING, "InitDqLayers(), InitSlicePEncCtx failed(%d)!", iResult);
        FreeMemorySvc (ppCtx);
        return iResult;
      }
      (*ppCtx)->ppDqLayerList[iDlayerIndex]->pSliceEncCtx = & (*ppCtx)->pSliceCtxList[iDlayerIndex];
    }
    pDqIdc->iSpsId = iSpsId;
    pDqIdc->iPpsId = iPpsId;

    (*ppCtx)->sPSOVector.bPpsIdMappingIntoSubsetsps[iPpsId] = bUseSubsetSps;

    if ((pParam->bSimulcastAVC) || (bUseSubsetSps))
      ++ iSpsId;
    ++ iPpsId;
    if (bUseSubsetSps) {
      ++ (*ppCtx)->iSubsetSpsNum;
    } else {
      ++ (*ppCtx)->iSpsNum;
    }
    ++ (*ppCtx)->iPpsNum;

    ++ iDlayerIndex;
  }
  if (SPS_LISTING & pParam->eSpsPpsIdStrategy) {
    (*ppCtx)->iSpsNum = (*ppCtx)->sPSOVector.uiInUseSpsNum;
    (*ppCtx)->iSubsetSpsNum = (*ppCtx)->sPSOVector.uiInUseSubsetSpsNum;
  }
  if (SPS_PPS_LISTING == pParam->eSpsPpsIdStrategy) {
    (*ppCtx)->iPpsNum = (*ppCtx)->sPSOVector.uiInUsePpsNum;
  }
  return ENC_RETURN_SUCCESS;
}

int32_t AllocStrideTables (sWelsEncCtx** ppCtx, const int32_t kiNumSpatialLayers) {
  CMemoryAlign* pMa             = (*ppCtx)->pMemAlign;
  SWelsSvcCodingParam* pParam   = (*ppCtx)->pSvcParam;
  SStrideTables* pPtr           = NULL;
  int16_t* pTmpRow              = NULL, *pRowX = NULL, *pRowY = NULL, *p = NULL;
  uint8_t* pBase                = NULL;
  uint8_t* pBaseDec = NULL, *pBaseEnc = NULL, *pBaseMbX = NULL, *pBaseMbY = NULL;
  struct {
    int32_t iMbWidth;
    int32_t iCountMbNum;                // count number of SMB in each spatial
    int32_t iSizeAllMbAlignCache;       // cache line size aligned in each spatial
  } sMbSizeMap[MAX_DEPENDENCY_LAYER] = {{ 0 }};
  int32_t iLineSizeY[MAX_DEPENDENCY_LAYER][2] = {{ 0 }};
  int32_t iLineSizeUV[MAX_DEPENDENCY_LAYER][2] = {{ 0 }};
  int32_t iMapSpatialIdx[MAX_DEPENDENCY_LAYER][2] = {{ 0 }};
  int32_t iSizeDec              = 0;
  int32_t iSizeEnc              = 0;
  int32_t iCountLayersNeedCs[2] = {0};
  const int32_t kiUnit1Size = 24 * sizeof (int32_t);
  int32_t iUnit2Size            = 0;
  int32_t iNeedAllocSize        = 0;
  int32_t iRowSize              = 0;
  int16_t iMaxMbWidth           = 0;
  int16_t iMaxMbHeight          = 0;
  int32_t i                     = 0;
  int32_t iSpatialIdx           = 0;
  int32_t iTemporalIdx          = 0;
  int32_t iCntTid               = 0;

  if (kiNumSpatialLayers <= 0 || kiNumSpatialLayers > MAX_DEPENDENCY_LAYER)
    return 1;

  pPtr = (SStrideTables*)pMa->WelsMalloc (sizeof (SStrideTables), "SStrideTables");
  if (NULL == pPtr)
    return 1;
  (*ppCtx)->pStrideTab = pPtr;

  iCntTid = pParam->iTemporalLayerNum > 1 ? 2 : 1;

  iSpatialIdx = 0;
  while (iSpatialIdx < kiNumSpatialLayers) {
    const int32_t kiTmpWidth = (pParam->sSpatialLayers[iSpatialIdx].iVideoWidth + 15) >> 4;
    const int32_t kiTmpHeight = (pParam->sSpatialLayers[iSpatialIdx].iVideoHeight + 15) >> 4;
    int32_t iNumMb = kiTmpWidth * kiTmpHeight;

    sMbSizeMap[iSpatialIdx].iMbWidth    = kiTmpWidth;
    sMbSizeMap[iSpatialIdx].iCountMbNum = iNumMb;

    iNumMb *= sizeof (int16_t);
    sMbSizeMap[iSpatialIdx].iSizeAllMbAlignCache = iNumMb;
    iUnit2Size += iNumMb;

    ++ iSpatialIdx;
  }

  // Adaptive size_cs, size_fdec by implementation dependency
  iTemporalIdx = 0;
  while (iTemporalIdx < iCntTid) {
    const bool kbBaseTemporalFlag = (iTemporalIdx == 0);

    iSpatialIdx = 0;
    while (iSpatialIdx < kiNumSpatialLayers) {
      SSpatialLayerConfig* fDlp = &pParam->sSpatialLayers[iSpatialIdx];

      const int32_t kiWidthPad = WELS_ALIGN (fDlp->iVideoWidth, 16) + (PADDING_LENGTH << 1);
      iLineSizeY[iSpatialIdx][kbBaseTemporalFlag]  = WELS_ALIGN (kiWidthPad, 32);
      iLineSizeUV[iSpatialIdx][kbBaseTemporalFlag] = WELS_ALIGN ((kiWidthPad >> 1), 16);

      iMapSpatialIdx[iCountLayersNeedCs[kbBaseTemporalFlag]][kbBaseTemporalFlag] = iSpatialIdx;
      ++ iCountLayersNeedCs[kbBaseTemporalFlag];
      ++ iSpatialIdx;
    }
    ++ iTemporalIdx;
  }
  iSizeDec = kiUnit1Size * (iCountLayersNeedCs[0] + iCountLayersNeedCs[1]);
  iSizeEnc = kiUnit1Size * kiNumSpatialLayers;

  iNeedAllocSize = iSizeDec + iSizeEnc + (iUnit2Size << 1);

  pBase = (uint8_t*)pMa->WelsMalloc (iNeedAllocSize, "pBase");
  if (NULL == pBase) {
    return 1;
  }

  pBaseDec = pBase;                     // iCountLayersNeedCs
  pBaseEnc = pBaseDec + iSizeDec;       // iNumSpatialLayers
  pBaseMbX = pBaseEnc + iSizeEnc;       // iNumSpatialLayers
  pBaseMbY = pBaseMbX + iUnit2Size;     // iNumSpatialLayers

  iTemporalIdx = 0;
  while (iTemporalIdx < iCntTid) {
    const bool kbBaseTemporalFlag = (iTemporalIdx == 0);

    iSpatialIdx = 0;
    while (iSpatialIdx < iCountLayersNeedCs[kbBaseTemporalFlag]) {
      const int32_t kiActualSpatialIdx = iMapSpatialIdx[iSpatialIdx][kbBaseTemporalFlag];
      const int32_t kiLumaWidth        = iLineSizeY[kiActualSpatialIdx][kbBaseTemporalFlag];
      const int32_t kiChromaWidth      = iLineSizeUV[kiActualSpatialIdx][kbBaseTemporalFlag];

      WelsGetEncBlockStrideOffset ((int32_t*)pBaseDec, kiLumaWidth, kiChromaWidth);

      pPtr->pStrideDecBlockOffset[kiActualSpatialIdx][kbBaseTemporalFlag] = (int32_t*)pBaseDec;
      pBaseDec += kiUnit1Size;

      ++ iSpatialIdx;
    }
    ++ iTemporalIdx;
  }
  iTemporalIdx = 0;
  while (iTemporalIdx < iCntTid) {
    const bool kbBaseTemporalFlag = (iTemporalIdx == 0);

    iSpatialIdx = 0;
    while (iSpatialIdx < kiNumSpatialLayers) {
      int32_t iMatchIndex = 0;
      bool bInMap = false;
      bool bMatchFlag = false;

      i = 0;
      while (i < iCountLayersNeedCs[kbBaseTemporalFlag]) {
        const int32_t kiActualIdx = iMapSpatialIdx[i][kbBaseTemporalFlag];
        if (kiActualIdx == iSpatialIdx) {
          bInMap = true;
          break;
        }
        if (!bMatchFlag) {
          iMatchIndex = kiActualIdx;
          bMatchFlag = true;
        }
        ++ i;
      }

      if (bInMap) {
        ++ iSpatialIdx;
        continue;
      }

      // not in spatial map and assign match one to it
      pPtr->pStrideDecBlockOffset[iSpatialIdx][kbBaseTemporalFlag] =
        pPtr->pStrideDecBlockOffset[iMatchIndex][kbBaseTemporalFlag];

      ++ iSpatialIdx;
    }
    ++ iTemporalIdx;
  }

  iSpatialIdx = 0;
  while (iSpatialIdx < kiNumSpatialLayers) {
    const int32_t kiAllocMbSize = sMbSizeMap[iSpatialIdx].iSizeAllMbAlignCache;

    pPtr->pStrideEncBlockOffset[iSpatialIdx]    = (int32_t*)pBaseEnc;

    pPtr->pMbIndexX[iSpatialIdx]                = (int16_t*)pBaseMbX;
    pPtr->pMbIndexY[iSpatialIdx]                = (int16_t*)pBaseMbY;

    pBaseEnc += kiUnit1Size;
    pBaseMbX += kiAllocMbSize;
    pBaseMbY += kiAllocMbSize;

    ++ iSpatialIdx;
  }

  while (iSpatialIdx < MAX_DEPENDENCY_LAYER) {
    pPtr->pStrideDecBlockOffset[iSpatialIdx][0] = NULL;
    pPtr->pStrideDecBlockOffset[iSpatialIdx][1] = NULL;
    pPtr->pStrideEncBlockOffset[iSpatialIdx]    = NULL;
    pPtr->pMbIndexX[iSpatialIdx]                = NULL;
    pPtr->pMbIndexY[iSpatialIdx]                = NULL;

    ++ iSpatialIdx;
  }

  // initialize pMbIndexX and pMbIndexY tables as below

  iMaxMbWidth   = sMbSizeMap[kiNumSpatialLayers - 1].iMbWidth;
  iMaxMbWidth   = WELS_ALIGN (iMaxMbWidth, 4);  // 4 loops for int16_t required introduced as below
  iRowSize      = iMaxMbWidth * sizeof (int16_t);

  pTmpRow = (int16_t*)pMa->WelsMalloc (iRowSize, "pTmpRow");
  if (NULL == pTmpRow) {
    return 1;
  }
  pRowX = pTmpRow;
  pRowY = pRowX;
  // initialize pRowX & pRowY
  i = 0;
  p = pRowX;
  while (i < iMaxMbWidth) {
    *p          = i;
    * (p + 1)   = 1 + i;
    * (p + 2)   = 2 + i;
    * (p + 3)   = 3 + i;

    p += 4;
    i += 4;
  }

  iSpatialIdx = kiNumSpatialLayers;
  while (--iSpatialIdx >= 0) {
    int16_t* pMbIndexX = pPtr->pMbIndexX[iSpatialIdx];
    const int32_t kiMbWidth     = sMbSizeMap[iSpatialIdx].iMbWidth;
    const int32_t kiMbHeight    = sMbSizeMap[iSpatialIdx].iCountMbNum / kiMbWidth;
    const int32_t kiLineSize    = kiMbWidth * sizeof (int16_t);

    i = 0;
    while (i < kiMbHeight) {
      memcpy (pMbIndexX, pRowX, kiLineSize); // confirmed_safe_unsafe_usage

      pMbIndexX += kiMbWidth;
      ++ i;
    }
  }

  memset (pRowY, 0, iRowSize);
  iMaxMbHeight = sMbSizeMap[kiNumSpatialLayers - 1].iCountMbNum / sMbSizeMap[kiNumSpatialLayers - 1].iMbWidth;
  i = 0;
  for (;;) {
    ENFORCE_STACK_ALIGN_1D (int16_t, t, 4, 16)

    int32_t t32 = 0;
    int16_t j = 0;

    for (iSpatialIdx = kiNumSpatialLayers - 1; iSpatialIdx >= 0; -- iSpatialIdx) {
      const int32_t kiMbWidth  = sMbSizeMap[iSpatialIdx].iMbWidth;
      const int32_t kiMbHeight = sMbSizeMap[iSpatialIdx].iCountMbNum / kiMbWidth;
      const int32_t kiLineSize = kiMbWidth * sizeof (int16_t);
      int16_t* pMbIndexY = pPtr->pMbIndexY[iSpatialIdx] + i * kiMbWidth;

      if (i < kiMbHeight) {
        memcpy (pMbIndexY, pRowY, kiLineSize); // confirmed_safe_unsafe_usage
      }
    }
    ++ i;
    if (i >= iMaxMbHeight)
      break;

    t32 = i | (i << 16);
    ST32 (t  , t32);
    ST32 (t + 2, t32);

    p = pRowY;
    while (j < iMaxMbWidth) {
      ST64 (p, LD64 (t));

      p += 4;
      j += 4;
    }
  }

  pMa->WelsFree (pTmpRow, "pTmpRow");
  pTmpRow = NULL;

  return 0;
}
int32_t RequestMemoryVaaScreen (SVAAFrameInfo* pVaa,  CMemoryAlign* pMa,  const int32_t iNumRef,
                                const int32_t iCountMax8x8BNum) {
  SVAAFrameInfoExt* pVaaExt = static_cast<SVAAFrameInfoExt*> (pVaa);

  pVaaExt->pVaaBlockStaticIdc[0] = (static_cast<uint8_t*> (pMa->WelsMallocz (iNumRef * iCountMax8x8BNum * sizeof (
                                      uint8_t), "pVaa->pVaaBlockStaticIdc[0]")));
  if (NULL == pVaaExt->pVaaBlockStaticIdc[0]) {
    return 1;
  }

  for (int32_t idx = 1; idx < iNumRef; idx++) {
    pVaaExt->pVaaBlockStaticIdc[idx] = pVaaExt->pVaaBlockStaticIdc[idx - 1] + iCountMax8x8BNum;
  }
  return 0;
}
void ReleaseMemoryVaaScreen (SVAAFrameInfo* pVaa,  CMemoryAlign* pMa, const int32_t iNumRef) {
  SVAAFrameInfoExt* pVaaExt = static_cast<SVAAFrameInfoExt*> (pVaa);
  if (pVaaExt && pMa && pVaaExt->pVaaBlockStaticIdc[0]) {
    pMa->WelsFree (pVaaExt->pVaaBlockStaticIdc[0], "pVaa->pVaaBlockStaticIdc[0]");

    for (int32_t idx = 0; idx < iNumRef; idx++) {
      pVaaExt->pVaaBlockStaticIdc[idx] = NULL;
    }
  }
}
/*!
 * \brief   request specific memory for SVC
 * \pParam  pEncCtx     sWelsEncCtx*
 * \return  successful - 0; otherwise none 0 for failed
 */
void GetMvMvdRange (SWelsSvcCodingParam* pParam, int32_t& iMvRange, int32_t& iMvdRange) {
  ELevelIdc iMinLevelIdc = LEVEL_5_2;
  int32_t iMinMv = 0;
  int32_t iMaxMv = 0;
  int32_t iFixMvRange = pParam->iUsageType ? EXPANDED_MV_RANGE : CAMERA_STARTMV_RANGE;
  int32_t iFixMvdRange = (pParam->iUsageType ? EXPANDED_MVD_RANGE : ((pParam->iSpatialLayerNum == 1) ? CAMERA_MVD_RANGE :
                          CAMERA_HIGHLAYER_MVD_RANGE));
  for (int32_t iLayer = 0; iLayer < pParam->iSpatialLayerNum; iLayer++) {
    if (pParam->sSpatialLayers[iLayer].uiLevelIdc < iMinLevelIdc)
      iMinLevelIdc = pParam->sSpatialLayers[iLayer].uiLevelIdc;
  }
  iMinMv = (g_ksLevelLimits[iMinLevelIdc - 1].iMinVmv) >> 2;
  iMaxMv = (g_ksLevelLimits[iMinLevelIdc - 1].iMaxVmv) >> 2;

  iMvRange = WELS_MIN (WELS_ABS (iMinMv), iMaxMv);

  iMvRange = WELS_MIN (iMvRange, iFixMvRange);

  iMvdRange = (iMvRange + 1) << 1;

  iMvdRange = WELS_MIN (iMvdRange, iFixMvdRange);
}
int32_t RequestMemorySvc (sWelsEncCtx** ppCtx, SExistingParasetList* pExistingParasetList) {
  SWelsSvcCodingParam* pParam           = (*ppCtx)->pSvcParam;
  CMemoryAlign* pMa                     = (*ppCtx)->pMemAlign;
  SSpatialLayerConfig* pFinalSpatial    = NULL;
  int32_t iCountBsLen                   = 0;
  int32_t iCountNals                    = 0;
  int32_t iMaxPicWidth                  = 0;
  int32_t iMaxPicHeight                 = 0;
  int32_t iCountMaxMbNum                = 0;
  int32_t iIndex                        = 0;
  int32_t iCountLayers                  = 0;
  int32_t iResult                       = 0;
  float fCompressRatioThr               = .5f;
  const int32_t kiNumDependencyLayers   = pParam->iSpatialLayerNum;
  int32_t iVclLayersBsSizeCount         = 0;
  int32_t iNonVclLayersBsSizeCount      = 0;
  int32_t iTargetSpatialBsSize          = 0;

  if (kiNumDependencyLayers < 1 || kiNumDependencyLayers > MAX_DEPENDENCY_LAYER) {
    WelsLog (& (*ppCtx)->sLogCtx, WELS_LOG_WARNING, "RequestMemorySvc() failed due to invalid iNumDependencyLayers(%d)!",
             kiNumDependencyLayers);
    FreeMemorySvc (ppCtx);
    return 1;
  }

  if (pParam->uiGopSize == 0 || (pParam->uiIntraPeriod && ((pParam->uiIntraPeriod % pParam->uiGopSize) != 0))) {
    WelsLog (& (*ppCtx)->sLogCtx, WELS_LOG_WARNING,
             "RequestMemorySvc() failed due to invalid uiIntraPeriod(%d) (=multipler of uiGopSize(%d)!",
             pParam->uiIntraPeriod, pParam->uiGopSize);
    FreeMemorySvc (ppCtx);
    return 1;
  }

  pFinalSpatial = &pParam->sSpatialLayers[kiNumDependencyLayers - 1];
  iMaxPicWidth  = pFinalSpatial->iVideoWidth;
  iMaxPicHeight = pFinalSpatial->iVideoHeight;
  iCountMaxMbNum = ((15 + iMaxPicWidth) >> 4) * ((15 + iMaxPicHeight) >> 4);

  iResult = AcquireLayersNals (ppCtx, pParam, &iCountLayers, &iCountNals);
  if (iResult) {
    WelsLog (& (*ppCtx)->sLogCtx, WELS_LOG_WARNING, "RequestMemorySvc(), AcquireLayersNals failed(%d)!", iResult);
    FreeMemorySvc (ppCtx);
    return 1;
  }

  const int32_t kiSpsSize = (*ppCtx)->GetNeededSpsNum() * SPS_BUFFER_SIZE;
  const int32_t kiPpsSize = (*ppCtx)->GetNeededPpsNum() * PPS_BUFFER_SIZE;
  iNonVclLayersBsSizeCount = SSEI_BUFFER_SIZE + kiSpsSize + kiPpsSize;

  bool    bDynamicSlice = false;
  uint32_t uiMaxSliceNumEstimation = 0;
  int32_t iSliceBufferSize = 0;
  int32_t iMaxSliceBufferSize = 0;
  int32_t iTotalLength = 0;
  int32_t iLayerBsSize = 0;
  iIndex = 0;
  while (iIndex < pParam->iSpatialLayerNum) {
    SSpatialLayerConfig* fDlp = &pParam->sSpatialLayers[iIndex];

    fCompressRatioThr = COMPRESS_RATIO_THR;

    iLayerBsSize = WELS_ROUND (((3 * fDlp->iVideoWidth * fDlp->iVideoHeight) >> 1) * fCompressRatioThr) +
                   MAX_MACROBLOCK_SIZE_IN_BYTE_x2;
    iLayerBsSize = WELS_ALIGN (iLayerBsSize, 4); // 4 bytes alinged
    iVclLayersBsSizeCount += iLayerBsSize;

    SSliceArgument* pSliceArgument = & (fDlp->sSliceArgument);
    if (pSliceArgument->uiSliceMode == SM_SIZELIMITED_SLICE) {
      bDynamicSlice = true;
      uiMaxSliceNumEstimation = WELS_MIN (AVERSLICENUM_CONSTRAINT,
                                          (iLayerBsSize / pSliceArgument->uiSliceSizeConstraint) + 1);
      (*ppCtx)->iMaxSliceCount = WELS_MAX ((*ppCtx)->iMaxSliceCount, (int) uiMaxSliceNumEstimation);
      iSliceBufferSize = (WELS_MAX(pSliceArgument->uiSliceSizeConstraint, iLayerBsSize/uiMaxSliceNumEstimation)<<1) + MAX_MACROBLOCK_SIZE_IN_BYTE_x2;
    } else {
      (*ppCtx)->iMaxSliceCount = WELS_MAX ((*ppCtx)->iMaxSliceCount, (int) pSliceArgument->uiSliceNum);
      iSliceBufferSize = ((iLayerBsSize / pSliceArgument->uiSliceNum)<<1) + MAX_MACROBLOCK_SIZE_IN_BYTE_x2;
    }
    iMaxSliceBufferSize = WELS_MAX(iMaxSliceBufferSize, iSliceBufferSize);

    ++ iIndex;
  }
  iTargetSpatialBsSize = iLayerBsSize;
  iCountBsLen = iNonVclLayersBsSizeCount + iVclLayersBsSizeCount;

  iMaxSliceBufferSize = WELS_MIN (iMaxSliceBufferSize, iTargetSpatialBsSize);
  iTotalLength = (pParam->iMultipleThreadIdc == 1) ? iCountBsLen : (iCountBsLen + ((*ppCtx)->iMaxSliceCount - 1) *
                 iMaxSliceBufferSize);

  pParam->iNumRefFrame = WELS_CLIP3 (pParam->iNumRefFrame, MIN_REF_PIC_COUNT,
                                      (pParam->iUsageType == CAMERA_VIDEO_REAL_TIME ? MAX_REFERENCE_PICTURE_COUNT_NUM_CAMERA :
                                       MAX_REFERENCE_PICTURE_COUNT_NUM_SCREEN));

  // Output
  (*ppCtx)->pOut = (SWelsEncoderOutput*)pMa->WelsMalloc (sizeof (SWelsEncoderOutput), "SWelsEncoderOutput");
  WELS_VERIFY_RETURN_PROC_IF (1, (NULL == (*ppCtx)->pOut), FreeMemorySvc (ppCtx))
  (*ppCtx)->pOut->pBsBuffer = (uint8_t*)pMa->WelsMalloc (iCountBsLen, "pOut->pBsBuffer");
  WELS_VERIFY_RETURN_PROC_IF (1, (NULL == (*ppCtx)->pOut->pBsBuffer), FreeMemorySvc (ppCtx))
  (*ppCtx)->pOut->uiSize = iCountBsLen;
  (*ppCtx)->pOut->sNalList = (SWelsNalRaw*)pMa->WelsMalloc (iCountNals * sizeof (SWelsNalRaw), "pOut->sNalList");
  WELS_VERIFY_RETURN_PROC_IF (1, (NULL == (*ppCtx)->pOut->sNalList), FreeMemorySvc (ppCtx))
  (*ppCtx)->pOut->pNalLen = (int32_t*)pMa->WelsMallocz (iCountNals * sizeof (int32_t), "pOut->pNalLen");
  WELS_VERIFY_RETURN_PROC_IF (1, (NULL == (*ppCtx)->pOut->pNalLen), FreeMemorySvc (ppCtx))
  (*ppCtx)->pOut->iCountNals    = iCountNals;
  (*ppCtx)->pOut->iNalIndex     = 0;
  (*ppCtx)->pOut->iLayerBsIndex = 0;


  (*ppCtx)->pFrameBs = (uint8_t*)pMa->WelsMalloc (iTotalLength, "pFrameBs");
  WELS_VERIFY_RETURN_PROC_IF (1, (NULL == (*ppCtx)->pFrameBs), FreeMemorySvc (ppCtx))
  (*ppCtx)->iFrameBsSize = iTotalLength;
  (*ppCtx)->iPosBsBuffer = 0;

  // for pSlice bs buffers
  if (pParam->iMultipleThreadIdc > 1 && RequestMtResource (ppCtx, pParam, iCountBsLen, iMaxSliceBufferSize, bDynamicSlice)) {
    WelsLog (& (*ppCtx)->sLogCtx, WELS_LOG_WARNING, "RequestMemorySvc(), RequestMtResource failed!");
    FreeMemorySvc (ppCtx);
    return 1;
  }

  (*ppCtx)->pIntra4x4PredModeBlocks = static_cast<int8_t*>
                                      (pMa->WelsMallocz (iCountMaxMbNum * INTRA_4x4_MODE_NUM, "pIntra4x4PredModeBlocks"));
  WELS_VERIFY_RETURN_PROC_IF (1, (NULL == (*ppCtx)->pIntra4x4PredModeBlocks), FreeMemorySvc (ppCtx))

  (*ppCtx)->pNonZeroCountBlocks = static_cast<int8_t*>
                                  (pMa->WelsMallocz (iCountMaxMbNum * MB_LUMA_CHROMA_BLOCK4x4_NUM, "pNonZeroCountBlocks"));
  WELS_VERIFY_RETURN_PROC_IF (1, (NULL == (*ppCtx)->pNonZeroCountBlocks), FreeMemorySvc (ppCtx))

  (*ppCtx)->pMvUnitBlock4x4 = static_cast<SMVUnitXY*>
                              (pMa->WelsMallocz (iCountMaxMbNum * 2 * MB_BLOCK4x4_NUM * sizeof (SMVUnitXY), "pMvUnitBlock4x4"));
  WELS_VERIFY_RETURN_PROC_IF (1, (NULL == (*ppCtx)->pMvUnitBlock4x4), FreeMemorySvc (ppCtx))

  (*ppCtx)->pRefIndexBlock4x4 = static_cast<int8_t*>
                                (pMa->WelsMallocz (iCountMaxMbNum * 2 * MB_BLOCK8x8_NUM * sizeof (int8_t), "pRefIndexBlock4x4"));
  WELS_VERIFY_RETURN_PROC_IF (1, (NULL == (*ppCtx)->pRefIndexBlock4x4), FreeMemorySvc (ppCtx))

  (*ppCtx)->pSadCostMb = static_cast<int32_t*>
                          (pMa->WelsMallocz (iCountMaxMbNum * sizeof (int32_t), "pSadCostMb"));
  WELS_VERIFY_RETURN_PROC_IF (1, (NULL == (*ppCtx)->pSadCostMb), FreeMemorySvc (ppCtx))

  (*ppCtx)->bEncCurFrmAsIdrFlag = true;  // make sure first frame is IDR
  (*ppCtx)->iGlobalQp = 26;   // global qp in default

  (*ppCtx)->pLtr = (SLTRState*)pMa->WelsMalloc (kiNumDependencyLayers * sizeof (SLTRState), "SLTRState");
  WELS_VERIFY_RETURN_PROC_IF (1, (NULL == (*ppCtx)->pLtr), FreeMemorySvc (ppCtx))
  int32_t i = 0;
  for (i = 0; i < kiNumDependencyLayers; i++) {
    ResetLtrState (& (*ppCtx)->pLtr[i]);
  }

  (*ppCtx)->ppRefPicListExt = (SRefList**)pMa->WelsMalloc (kiNumDependencyLayers * sizeof (SRefList*), "ppRefPicListExt");
  WELS_VERIFY_RETURN_PROC_IF (1, (NULL == (*ppCtx)->ppRefPicListExt), FreeMemorySvc (ppCtx))

  // pSlice context list
  (*ppCtx)->pSliceCtxList = (SSliceCtx*)pMa->WelsMallocz (kiNumDependencyLayers * sizeof (SSliceCtx), "pSliceCtxList");
  WELS_VERIFY_RETURN_PROC_IF (1, (NULL == (*ppCtx)->pSliceCtxList), FreeMemorySvc (ppCtx))

  (*ppCtx)->ppDqLayerList = (SDqLayer**)pMa->WelsMalloc (kiNumDependencyLayers * sizeof (SDqLayer*), "ppDqLayerList");
  WELS_VERIFY_RETURN_PROC_IF (1, (NULL == (*ppCtx)->ppDqLayerList), FreeMemorySvc (ppCtx))

  // stride tables
  if (AllocStrideTables (ppCtx, kiNumDependencyLayers)) {
    WelsLog (& (*ppCtx)->sLogCtx, WELS_LOG_WARNING, "RequestMemorySvc(), AllocStrideTables failed!");
    FreeMemorySvc (ppCtx);
    return 1;
  }

  //Rate control module memory allocation
  // only malloc once for RC pData, 12/14/2009
  (*ppCtx)->pWelsSvcRc = (SWelsSvcRc*)pMa->WelsMallocz (kiNumDependencyLayers * sizeof (SWelsSvcRc), "pWelsSvcRc");
  WELS_VERIFY_RETURN_PROC_IF (1, (NULL == (*ppCtx)->pWelsSvcRc), FreeMemorySvc (ppCtx))
  //End of Rate control module memory allocation

  //pVaa memory allocation
  if (pParam->iUsageType == SCREEN_CONTENT_REAL_TIME) {
    (*ppCtx)->pVaa = (SVAAFrameInfoExt*)pMa->WelsMallocz (sizeof (SVAAFrameInfoExt), "pVaa");
    WELS_VERIFY_RETURN_PROC_IF (1, (NULL == (*ppCtx)->pVaa), FreeMemorySvc (ppCtx))
    if (RequestMemoryVaaScreen ((*ppCtx)->pVaa, pMa, (*ppCtx)->pSvcParam->iMaxNumRefFrame, iCountMaxMbNum << 2)) {
      WelsLog (& (*ppCtx)->sLogCtx, WELS_LOG_WARNING, "RequestMemorySvc(), RequestMemoryVaaScreen failed!");
      FreeMemorySvc (ppCtx);
      return 1;
    }
  } else {
    (*ppCtx)->pVaa = (SVAAFrameInfo*)pMa->WelsMallocz (sizeof (SVAAFrameInfo), "pVaa");
    WELS_VERIFY_RETURN_PROC_IF (1, (NULL == (*ppCtx)->pVaa), FreeMemorySvc (ppCtx))
  }

  if ((*ppCtx)->pSvcParam->bEnableAdaptiveQuant) { //malloc mem
    (*ppCtx)->pVaa->sAdaptiveQuantParam.pMotionTextureUnit   = static_cast<SMotionTextureUnit*>
        (pMa->WelsMallocz (iCountMaxMbNum * sizeof (SMotionTextureUnit), "pVaa->sAdaptiveQuantParam.pMotionTextureUnit"));
    WELS_VERIFY_RETURN_PROC_IF (1, (NULL == (*ppCtx)->pVaa->sAdaptiveQuantParam.pMotionTextureUnit), FreeMemorySvc (ppCtx))
    (*ppCtx)->pVaa->sAdaptiveQuantParam.pMotionTextureIndexToDeltaQp   = static_cast<int8_t*>
        (pMa->WelsMallocz (iCountMaxMbNum * sizeof (int8_t), "pVaa->sAdaptiveQuantParam.pMotionTextureIndexToDeltaQp"));
    WELS_VERIFY_RETURN_PROC_IF (1, (NULL == (*ppCtx)->pVaa->sAdaptiveQuantParam.pMotionTextureIndexToDeltaQp),
                                FreeMemorySvc (ppCtx))
  }

  (*ppCtx)->pVaa->pVaaBackgroundMbFlag = (int8_t*)pMa->WelsMallocz (iCountMaxMbNum * sizeof (int8_t),
                                         "pVaa->pVaaBackgroundMbFlag");
  WELS_VERIFY_RETURN_PROC_IF (1, (NULL == (*ppCtx)->pVaa->pVaaBackgroundMbFlag), FreeMemorySvc (ppCtx))

  (*ppCtx)->pVaa->sVaaCalcInfo.pSad8x8 = static_cast<int32_t (*)[4]>
                                         (pMa->WelsMallocz (iCountMaxMbNum * 4 * sizeof (int32_t), "pVaa->sVaaCalcInfo.sad8x8"));
  WELS_VERIFY_RETURN_PROC_IF (1, (NULL == (*ppCtx)->pVaa->sVaaCalcInfo.pSad8x8), FreeMemorySvc (ppCtx))
  (*ppCtx)->pVaa->sVaaCalcInfo.pSsd16x16 = static_cast<int32_t*>
      (pMa->WelsMallocz (iCountMaxMbNum * sizeof (int32_t), "pVaa->sVaaCalcInfo.pSsd16x16"));
  WELS_VERIFY_RETURN_PROC_IF (1, (NULL == (*ppCtx)->pVaa->sVaaCalcInfo.pSsd16x16), FreeMemorySvc (ppCtx))
  (*ppCtx)->pVaa->sVaaCalcInfo.pSum16x16 = static_cast<int32_t*>
      (pMa->WelsMallocz (iCountMaxMbNum * sizeof (int32_t), "pVaa->sVaaCalcInfo.pSum16x16"));
  WELS_VERIFY_RETURN_PROC_IF (1, (NULL == (*ppCtx)->pVaa->sVaaCalcInfo.pSum16x16), FreeMemorySvc (ppCtx))
  (*ppCtx)->pVaa->sVaaCalcInfo.pSumOfSquare16x16 = static_cast<int32_t*>
      (pMa->WelsMallocz (iCountMaxMbNum * sizeof (int32_t), "pVaa->sVaaCalcInfo.pSumOfSquare16x16"));
  WELS_VERIFY_RETURN_PROC_IF (1, (NULL == (*ppCtx)->pVaa->sVaaCalcInfo.pSumOfSquare16x16), FreeMemorySvc (ppCtx))

  if ((*ppCtx)->pSvcParam->bEnableBackgroundDetection) { //BGD control
    (*ppCtx)->pVaa->sVaaCalcInfo.pSumOfDiff8x8 = static_cast<int32_t (*)[4]>
        (pMa->WelsMallocz (iCountMaxMbNum * 4 * sizeof (int32_t), "pVaa->sVaaCalcInfo.pSumOfDiff8x8"));
    WELS_VERIFY_RETURN_PROC_IF (1, (NULL == (*ppCtx)->pVaa->sVaaCalcInfo.pSumOfDiff8x8), FreeMemorySvc (ppCtx))
    (*ppCtx)->pVaa->sVaaCalcInfo.pMad8x8 = static_cast<uint8_t (*)[4]>
                                           (pMa->WelsMallocz (iCountMaxMbNum * 4 * sizeof (uint8_t), "pVaa->sVaaCalcInfo.pMad8x8"));
    WELS_VERIFY_RETURN_PROC_IF (1, (NULL == (*ppCtx)->pVaa->sVaaCalcInfo.pMad8x8), FreeMemorySvc (ppCtx))
  }

  //End of pVaa memory allocation

  iResult = InitDqLayers (ppCtx, pExistingParasetList);
  if (iResult) {
    WelsLog (& (*ppCtx)->sLogCtx, WELS_LOG_WARNING, "RequestMemorySvc(), InitDqLayers failed(%d)!", iResult);
    FreeMemorySvc (ppCtx);
    return iResult;
  }

  if (InitMbListD (ppCtx)) {
    WelsLog (& (*ppCtx)->sLogCtx, WELS_LOG_WARNING, "RequestMemorySvc(), InitMbListD failed!");
    FreeMemorySvc (ppCtx);
    return 1;
  }

  int32_t iMvdRange = 0;
  GetMvMvdRange (pParam, (*ppCtx)->iMvRange, iMvdRange);
  const uint32_t kuiMvdInterTableSize   = (iMvdRange << 2); //intepel*4=qpel
  const uint32_t kuiMvdInterTableStride =  1 + (kuiMvdInterTableSize << 1);//qpel_mv_range*2=(+/-);
  const uint32_t kuiMvdCacheAlignedSize = kuiMvdInterTableStride * sizeof (uint16_t);

  (*ppCtx)->iMvdCostTableSize = kuiMvdInterTableSize;
  (*ppCtx)->iMvdCostTableStride = kuiMvdInterTableStride;
  (*ppCtx)->pMvdCostTable = (uint16_t*)pMa->WelsMallocz (52 * kuiMvdCacheAlignedSize, "pMvdCostTable");
  WELS_VERIFY_RETURN_PROC_IF (1, (NULL == (*ppCtx)->pMvdCostTable), FreeMemorySvc (ppCtx))
  MvdCostInit ((*ppCtx)->pMvdCostTable, kuiMvdInterTableStride);  //should put to a better place?

  if ((*ppCtx)->ppRefPicListExt[0] != NULL && (*ppCtx)->ppRefPicListExt[0]->pRef[0] != NULL)
    (*ppCtx)->pDecPic = (*ppCtx)->ppRefPicListExt[0]->pRef[0];
  else
    (*ppCtx)->pDecPic = NULL; // error here

  (*ppCtx)->pSps = & (*ppCtx)->pSpsArray[0];
  (*ppCtx)->pPps = & (*ppCtx)->pPPSArray[0];

  return 0;
}


/*!
 * \brief   free memory in SVC core encoder
 * \pParam  pEncCtx     sWelsEncCtx*
 * \return  none
 */
void FreeMemorySvc (sWelsEncCtx** ppCtx) {
  if (NULL != *ppCtx) {
    sWelsEncCtx* pCtx = *ppCtx;
    CMemoryAlign* pMa = pCtx->pMemAlign;
    SWelsSvcCodingParam* pParam = pCtx->pSvcParam;
    int32_t ilayer = 0;

    // SStrideTables
    if (NULL != pCtx->pStrideTab) {
      if (NULL != pCtx->pStrideTab->pStrideDecBlockOffset[0][1]) {
        pMa->WelsFree (pCtx->pStrideTab->pStrideDecBlockOffset[0][1], "pBase");
        pCtx->pStrideTab->pStrideDecBlockOffset[0][1] = NULL;
      }
      pMa->WelsFree (pCtx->pStrideTab, "SStrideTables");
      pCtx->pStrideTab = NULL;
    }
    // pDq idc map
    if (NULL != pCtx->pDqIdcMap) {
      pMa->WelsFree (pCtx->pDqIdcMap, "pDqIdcMap");
      pCtx->pDqIdcMap = NULL;
    }

    if (NULL != pCtx->pOut) {
      // bs pBuffer
      if (NULL != pCtx->pOut->pBsBuffer) {
        pMa->WelsFree (pCtx->pOut->pBsBuffer, "pOut->pBsBuffer");
        pCtx->pOut->pBsBuffer = NULL;
      }
      // NALs list
      if (NULL != pCtx->pOut->sNalList) {
        pMa->WelsFree (pCtx->pOut->sNalList, "pOut->sNalList");
        pCtx->pOut->sNalList = NULL;
      }
      // NALs len
      if (NULL != pCtx->pOut->pNalLen) {
        pMa->WelsFree (pCtx->pOut->pNalLen, "pOut->pNalLen");
        pCtx->pOut->pNalLen = NULL;
      }
      pMa->WelsFree (pCtx->pOut, "SWelsEncoderOutput");
      pCtx->pOut = NULL;
    }

    if (pParam != NULL && pParam->iMultipleThreadIdc > 1)
      ReleaseMtResource (ppCtx);

    // frame bitstream pBuffer
    if (NULL != pCtx->pFrameBs) {
      pMa->WelsFree (pCtx->pFrameBs, "pFrameBs");
      pCtx->pFrameBs = NULL;
    }

    // pSpsArray
    if (NULL != pCtx->pSpsArray) {
      pMa->WelsFree (pCtx->pSpsArray, "pSpsArray");
      pCtx->pSpsArray = NULL;
    }
    // pPPSArray
    if (NULL != pCtx->pPPSArray) {
      pMa->WelsFree (pCtx->pPPSArray, "pPPSArray");
      pCtx->pPPSArray = NULL;
    }
    // subset_sps_array
    if (NULL != pCtx->pSubsetArray) {
      pMa->WelsFree (pCtx->pSubsetArray, "pSubsetArray");
      pCtx->pSubsetArray = NULL;
    }

    if (NULL != pCtx->pIntra4x4PredModeBlocks) {
      pMa->WelsFree (pCtx->pIntra4x4PredModeBlocks, "pIntra4x4PredModeBlocks");
      pCtx->pIntra4x4PredModeBlocks = NULL;
    }

    if (NULL != pCtx->pNonZeroCountBlocks) {
      pMa->WelsFree (pCtx->pNonZeroCountBlocks, "pNonZeroCountBlocks");
      pCtx->pNonZeroCountBlocks = NULL;
    }

    if (NULL != pCtx->pMvUnitBlock4x4) {
      pMa->WelsFree (pCtx->pMvUnitBlock4x4, "pMvUnitBlock4x4");
      pCtx->pMvUnitBlock4x4 = NULL;
    }

    if (NULL != pCtx->pRefIndexBlock4x4) {
      pMa->WelsFree (pCtx->pRefIndexBlock4x4, "pRefIndexBlock4x4");
      pCtx->pRefIndexBlock4x4 = NULL;
    }

    if (NULL != pCtx->ppMbListD) {
      if (NULL != pCtx->ppMbListD[0]) {
        pMa->WelsFree (pCtx->ppMbListD[0], "ppMbListD[0]");
        (*ppCtx)->ppMbListD[0] = NULL;
      }
      pMa->WelsFree (pCtx->ppMbListD, "ppMbListD");
      pCtx->ppMbListD = NULL;
    }

    if (NULL != pCtx->pSadCostMb) {
      pMa->WelsFree (pCtx->pSadCostMb, "pSadCostMb");
      pCtx->pSadCostMb = NULL;
    }

    // SLTRState
    if (NULL != pCtx->pLtr) {
      pMa->WelsFree (pCtx->pLtr, "SLTRState");
      pCtx->pLtr = NULL;
    }

    // pDq layers list
    ilayer = 0;
    if (NULL != pCtx->ppDqLayerList && pParam != NULL) {
      while (ilayer < pParam->iSpatialLayerNum) {
        SDqLayer* pDq = pCtx->ppDqLayerList[ilayer];
        SSpatialLayerConfig* pDlp = &pCtx->pSvcParam->sSpatialLayers[ilayer];

        const bool kbIsDynamicSlicing = (SM_SIZELIMITED_SLICE == pDlp->sSliceArgument.uiSliceMode);

        // pDq layers
        if (NULL != pDq) {
          if (NULL != pDq->sLayerInfo.pSliceInLayer) {
            int32_t iSliceIdx = 0;
            int32_t iSliceNum = GetInitialSliceNum (pDq->iMbWidth, pDq->iMbHeight, &pDlp->sSliceArgument);
            if (pDlp->sSliceArgument.uiSliceMode == SM_SIZELIMITED_SLICE && pCtx->iActiveThreadsNum == 1) {
              if (iSliceNum < pDq->pSliceEncCtx->iMaxSliceNumConstraint) {
                iSliceNum = pDq->pSliceEncCtx->iMaxSliceNumConstraint;
              }
            }
            if (iSliceNum < 1)
              iSliceNum = 1;
            while (iSliceIdx < iSliceNum) {
              SSlice* pSlice = &pDq->sLayerInfo.pSliceInLayer[iSliceIdx];
              FreeMbCache (&pSlice->sMbCacheInfo, pMa);
              ++ iSliceIdx;
            }
            pMa->WelsFree (pDq->sLayerInfo.pSliceInLayer, "pSliceInLayer");
            pDq->sLayerInfo.pSliceInLayer = NULL;
          }
          if (kbIsDynamicSlicing) {
            pMa->WelsFree (pDq->pNumSliceCodedOfPartition, "pNumSliceCodedOfPartition");
            pDq->pNumSliceCodedOfPartition = NULL;
            pMa->WelsFree (pDq->pLastCodedMbIdxOfPartition, "pLastCodedMbIdxOfPartition");
            pDq->pLastCodedMbIdxOfPartition = NULL;
            pMa->WelsFree (pDq->pLastMbIdxOfPartition, "pLastMbIdxOfPartition");
            pDq->pLastMbIdxOfPartition = NULL;
          }

          if (pDq->pFeatureSearchPreparation) {
            ReleaseFeatureSearchPreparation (pMa, pDq->pFeatureSearchPreparation->pFeatureOfBlock);
            pMa->WelsFree (pDq->pFeatureSearchPreparation, "pFeatureSearchPreparation");
            pDq->pFeatureSearchPreparation = NULL;
          }

          pMa->WelsFree (pDq, "pDq");
          pDq = NULL;
          pCtx->ppDqLayerList[ilayer] = NULL;
        }
        ++ ilayer;
      }
      pMa->WelsFree (pCtx->ppDqLayerList, "ppDqLayerList");
      pCtx->ppDqLayerList = NULL;
    }
    // reference picture list extension
    if (NULL != pCtx->ppRefPicListExt && pParam != NULL) {
      ilayer = 0;
      while (ilayer < pParam->iSpatialLayerNum) {
        SRefList* pRefList = pCtx->ppRefPicListExt[ilayer];
        if (NULL != pRefList) {
          int32_t iRef = 0;
          do {
            if (pRefList->pRef[iRef] != NULL) {
              FreePicture (pMa, &pRefList->pRef[iRef]);
            }
            ++ iRef;
          } while (iRef < 1 + pParam->iMaxNumRefFrame);

          pMa->WelsFree (pCtx->ppRefPicListExt[ilayer], "ppRefPicListExt[]");
          pCtx->ppRefPicListExt[ilayer] = NULL;
        }
        ++ ilayer;
      }

      pMa->WelsFree (pCtx->ppRefPicListExt, "ppRefPicListExt");
      pCtx->ppRefPicListExt = NULL;
    }

    // pSlice context list
    if (NULL != pCtx->pSliceCtxList && pParam != NULL) {
      ilayer = 0;
      while (ilayer < pParam->iSpatialLayerNum) {
        SSliceCtx* pSliceCtx = &pCtx->pSliceCtxList[ilayer];
        if (NULL != pSliceCtx)
          UninitSlicePEncCtx (pSliceCtx, pMa);
        ++ ilayer;
      }
      pMa->WelsFree (pCtx->pSliceCtxList, "pSliceCtxList");
      pCtx->pSliceCtxList = NULL;
    }

    // VAA
    if (NULL != pCtx->pVaa) {
      if (pCtx->pSvcParam->bEnableAdaptiveQuant) { //free mem
        pMa->WelsFree (pCtx->pVaa->sAdaptiveQuantParam.pMotionTextureUnit, "pVaa->sAdaptiveQuantParam.pMotionTextureUnit");
        pCtx->pVaa->sAdaptiveQuantParam.pMotionTextureUnit = NULL;
        pMa->WelsFree (pCtx->pVaa->sAdaptiveQuantParam.pMotionTextureIndexToDeltaQp,
                       "pVaa->sAdaptiveQuantParam.pMotionTextureIndexToDeltaQp");
        pCtx->pVaa->sAdaptiveQuantParam.pMotionTextureIndexToDeltaQp = NULL;
      }

      pMa->WelsFree (pCtx->pVaa->pVaaBackgroundMbFlag, "pVaa->pVaaBackgroundMbFlag");
      pCtx->pVaa->pVaaBackgroundMbFlag = NULL;
      pMa->WelsFree (pCtx->pVaa->sVaaCalcInfo.pSad8x8, "pVaa->sVaaCalcInfo.sad8x8");
      pCtx->pVaa->sVaaCalcInfo.pSad8x8 = NULL;
      pMa->WelsFree (pCtx->pVaa->sVaaCalcInfo.pSsd16x16, "pVaa->sVaaCalcInfo.pSsd16x16");
      pCtx->pVaa->sVaaCalcInfo.pSsd16x16 = NULL;
      pMa->WelsFree (pCtx->pVaa->sVaaCalcInfo.pSum16x16, "pVaa->sVaaCalcInfo.pSum16x16");
      pCtx->pVaa->sVaaCalcInfo.pSum16x16 = NULL;
      pMa->WelsFree (pCtx->pVaa->sVaaCalcInfo.pSumOfSquare16x16, "pVaa->sVaaCalcInfo.pSumOfSquare16x16");
      pCtx->pVaa->sVaaCalcInfo.pSumOfSquare16x16 = NULL;

      if (pCtx->pSvcParam->bEnableBackgroundDetection) { //BGD control
        pMa->WelsFree (pCtx->pVaa->sVaaCalcInfo.pSumOfDiff8x8, "pVaa->sVaaCalcInfo.pSumOfDiff8x8");
        pCtx->pVaa->sVaaCalcInfo.pSumOfDiff8x8 = NULL;
        pMa->WelsFree (pCtx->pVaa->sVaaCalcInfo.pMad8x8, "pVaa->sVaaCalcInfo.pMad8x8");
        pCtx->pVaa->sVaaCalcInfo.pMad8x8 = NULL;
      }
      if (pCtx->pSvcParam->iUsageType == SCREEN_CONTENT_REAL_TIME)
        ReleaseMemoryVaaScreen (pCtx->pVaa, pMa, pCtx->pSvcParam->iMaxNumRefFrame);
      pMa->WelsFree (pCtx->pVaa, "pVaa");
      pCtx->pVaa = NULL;
    }

    WelsRcFreeMemory (pCtx);
    // rate control module memory free
    if (NULL != pCtx->pWelsSvcRc) {
      pMa->WelsFree (pCtx->pWelsSvcRc, "pWelsSvcRc");
      pCtx->pWelsSvcRc = NULL;
    }

    /* MVD cost tables for Inter */
    if (NULL != pCtx->pMvdCostTable) {
      pMa->WelsFree (pCtx->pMvdCostTable, "pMvdCostTable");
      pCtx->pMvdCostTable = NULL;
    }

    FreeCodingParam (&pCtx->pSvcParam, pMa);
    if (NULL != pCtx->pFuncList) {
      pMa->WelsFree (pCtx->pFuncList, "SWelsFuncPtrList");
      pCtx->pFuncList = NULL;
    }

#if defined(MEMORY_MONITOR)
    assert (pMa->WelsGetMemoryUsage() == 0); // ensure all memory free well
#endif//MEMORY_MONITOR

    if ((*ppCtx)->pMemAlign != NULL) {
      WelsLog (& (*ppCtx)->sLogCtx, WELS_LOG_INFO, "FreeMemorySvc(), verify memory usage (%d bytes) after free..",
               (*ppCtx)->pMemAlign->WelsGetMemoryUsage());
      delete (*ppCtx)->pMemAlign;
      (*ppCtx)->pMemAlign = NULL;
    }

    free (*ppCtx);
    *ppCtx = NULL;
  }
}

int32_t InitSliceSettings (SLogContext* pLogCtx, SWelsSvcCodingParam* pCodingParam, const int32_t kiCpuCores,
                           int16_t* pMaxSliceCount) {
  int32_t iSpatialIdx = 0, iSpatialNum = pCodingParam->iSpatialLayerNum;
  uint16_t iMaxSliceCount = 0;

  do {
    SSpatialLayerConfig* pDlp           = &pCodingParam->sSpatialLayers[iSpatialIdx];
    SSliceArgument* pSliceArgument                  = &pDlp->sSliceArgument;
    const int32_t kiMbWidth             = (pDlp->iVideoWidth + 15) >> 4;
    const int32_t kiMbHeight            = (pDlp->iVideoHeight + 15) >> 4;
    const int32_t kiMbNumInFrame        = kiMbWidth * kiMbHeight;
    int32_t iSliceNum                   = (SM_FIXEDSLCNUM_SLICE == pSliceArgument->uiSliceMode && 0==pSliceArgument->uiSliceNum) ? kiCpuCores : pSliceArgument->uiSliceNum;
    // NOTE: Per design, in case MT/DYNAMIC_SLICE_ASSIGN enabled, for SM_FIXEDSLCNUM_SLICE mode,
    // uiSliceNum of current spatial layer settings equals to uiCpuCores number; SM_SIZELIMITED_SLICE mode,
    // uiSliceNum intials as uiCpuCores also, stay tuned dynamically slicing in future
    pSliceArgument->uiSliceNum = iSliceNum;    // used fixed one

    switch (pSliceArgument->uiSliceMode) {
    case SM_SIZELIMITED_SLICE:
      iMaxSliceCount = AVERSLICENUM_CONSTRAINT;
      break; // go through for SM_SIZELIMITED_SLICE?
    case SM_FIXEDSLCNUM_SLICE:
      if (iSliceNum > iMaxSliceCount) {
        iMaxSliceCount = iSliceNum;
      }
      if (0==iSliceNum) {
        //the auto slice num logic
        pDlp->sSliceArgument.uiSliceNum = kiCpuCores;

        if (0==kiCpuCores){
        int32_t uiCpuCores = 0;
        WelsCPUFeatureDetect (&uiCpuCores); // detect cpu capacity features
        pDlp->sSliceArgument.uiSliceNum = uiCpuCores;

        if (uiCpuCores == 1) {
          WelsLog (pLogCtx, WELS_LOG_INFO, "InitSliceSettings(), uiCpuCores = 1, switched to SM_SINGLE_SLICE");
          pDlp->sSliceArgument.uiSliceMode = SM_SINGLE_SLICE;
          pDlp->sSliceArgument.uiSliceNum = 1;
          pDlp->sSliceArgument.uiSliceSizeConstraint = 0;
          for (int32_t iIdx = 0; iIdx < MAX_SLICES_NUM; iIdx++) {
            pDlp->sSliceArgument.uiSliceMbNum[iIdx] = 0;
          }
          break;
        }
        }

        if (pDlp->sSliceArgument.uiSliceNum == 1) {
          WelsLog (pLogCtx, WELS_LOG_DEBUG,
                   "InitSliceSettings(), uiSliceNum(%d) you set for SM_AUTO_SLICE, now turn to SM_SINGLE_SLICE type!",
                   pDlp->sSliceArgument.uiSliceNum);
          pDlp->sSliceArgument.uiSliceMode = SM_SINGLE_SLICE;
          break;
        }

        if (pDlp->sSliceArgument.uiSliceNum > MAX_SLICES_NUM) {
          pDlp->sSliceArgument.uiSliceNum = MAX_SLICES_NUM;
        }
        iMaxSliceCount = WELS_MAX(iMaxSliceCount, pDlp->sSliceArgument.uiSliceNum);

        if (pCodingParam->iRCMode != RC_OFF_MODE) { // multiple slices verify with gom
          //check uiSliceNum and set uiSliceMbNum with current uiSliceNum
          if (!GomValidCheckSliceNum (kiMbWidth, kiMbHeight, &pDlp->sSliceArgument.uiSliceNum)) {
            WelsLog (pLogCtx, WELS_LOG_WARNING,
                     "InitSliceSettings(), unsupported setting with Resolution and uiSliceNum combination under RC on! So uiSliceNum is changed to %d!",
                     pDlp->sSliceArgument.uiSliceNum);
          }
          if (pDlp->sSliceArgument.uiSliceNum <= 1 ||
              !GomValidCheckSliceMbNum (kiMbWidth, kiMbHeight, &pDlp->sSliceArgument)) {
            WelsLog (pLogCtx, WELS_LOG_ERROR,
                     "InitSliceSettings(), unsupported setting with Resolution and uiSliceNum (%d) combination  under RC on! Consider setting single slice with this resolution!",
                     pDlp->sSliceArgument.uiSliceNum);
            return ENC_RETURN_INVALIDINPUT;
          }
        } else if (!CheckFixedSliceNumMultiSliceSetting (kiMbNumInFrame,
                                                         &pDlp->sSliceArgument)) {    // verify interleave mode settings
          //check uiSliceMbNum with current uiSliceNum
          WelsLog (pLogCtx, WELS_LOG_ERROR,
                   "InitSliceSettings(), invalid uiSliceMbNum (%d) settings!,now turn to SM_SINGLE_SLICE type",
                   pDlp->sSliceArgument.uiSliceMbNum[0]);
          pDlp->sSliceArgument.uiSliceMode = SM_SINGLE_SLICE;
          pDlp->sSliceArgument.uiSliceNum = 1;
        }
        // considering the coding efficient and performance, iCountMbNum constraint by MIN_NUM_MB_PER_SLICE condition of multi-pSlice mode settting
        if (kiMbNumInFrame <= MIN_NUM_MB_PER_SLICE) {
          pDlp->sSliceArgument.uiSliceMode = SM_SINGLE_SLICE;
          pDlp->sSliceArgument.uiSliceNum = 1;
          break;
        }
      }
      // need perform check due uiSliceNum might change, although has been initialized somewhere outside
      if (pCodingParam->iRCMode != RC_OFF_MODE) {
        GomValidCheckSliceMbNum (kiMbWidth, kiMbHeight, pSliceArgument);
      } else {
        CheckFixedSliceNumMultiSliceSetting (kiMbNumInFrame, pSliceArgument);
      }
      break;
    case SM_SINGLE_SLICE:
      if (iSliceNum > iMaxSliceCount)
        iMaxSliceCount = iSliceNum;
      break;
    case SM_RASTER_SLICE:
      if (iSliceNum > iMaxSliceCount)
        iMaxSliceCount = iSliceNum;
      break;
    default:
      break;
    }

    ++ iSpatialIdx;
  } while (iSpatialIdx < iSpatialNum);

  pCodingParam->iCountThreadsNum = WELS_MIN (kiCpuCores, iMaxSliceCount);
  pCodingParam->iMultipleThreadIdc = pCodingParam->iCountThreadsNum;
  if (pCodingParam->iLoopFilterDisableIdc == 0
      && pCodingParam->iMultipleThreadIdc != 1) // Loop filter requested to be enabled, with threading enabled
    pCodingParam->iLoopFilterDisableIdc =
      2; // Disable loop filter on slice boundaries since that's not allowed with multithreading
  *pMaxSliceCount = iMaxSliceCount;

  return ENC_RETURN_SUCCESS;
}

/*!
 * \brief   log output for cpu features/capabilities
 */
void OutputCpuFeaturesLog (SLogContext* pLogCtx, uint32_t uiCpuFeatureFlags, uint32_t uiCpuCores,
                           int32_t iCacheLineSize) {
  // welstracer output
  WelsLog (pLogCtx, WELS_LOG_INFO, "WELS CPU features/capacities (0x%x) detected: \t"
           "HTT:      %c, "
           "MMX:      %c, "
           "MMXEX:    %c, "
           "SSE:      %c, "
           "SSE2:     %c, "
           "SSE3:     %c, "
           "SSSE3:    %c, "
           "SSE4.1:   %c, "
           "SSE4.2:   %c, "
           "AVX:      %c, "
           "FMA:      %c, "
           "X87-FPU:  %c, "
           "3DNOW:    %c, "
           "3DNOWEX:  %c, "
           "ALTIVEC:  %c, "
           "CMOV:     %c, "
           "MOVBE:    %c, "
           "AES:      %c, "
           "NUMBER OF LOGIC PROCESSORS ON CHIP: %d, "
           "CPU CACHE LINE SIZE (BYTES):        %d",
           uiCpuFeatureFlags,
           (uiCpuFeatureFlags & WELS_CPU_HTT) ? 'Y' : 'N',
           (uiCpuFeatureFlags & WELS_CPU_MMX) ? 'Y' : 'N',
           (uiCpuFeatureFlags & WELS_CPU_MMXEXT) ? 'Y' : 'N',
           (uiCpuFeatureFlags & WELS_CPU_SSE) ? 'Y' : 'N',
           (uiCpuFeatureFlags & WELS_CPU_SSE2) ? 'Y' : 'N',
           (uiCpuFeatureFlags & WELS_CPU_SSE3) ? 'Y' : 'N',
           (uiCpuFeatureFlags & WELS_CPU_SSSE3) ? 'Y' : 'N',
           (uiCpuFeatureFlags & WELS_CPU_SSE41) ? 'Y' : 'N',
           (uiCpuFeatureFlags & WELS_CPU_SSE42) ? 'Y' : 'N',
           (uiCpuFeatureFlags & WELS_CPU_AVX) ? 'Y' : 'N',
           (uiCpuFeatureFlags & WELS_CPU_FMA) ? 'Y' : 'N',
           (uiCpuFeatureFlags & WELS_CPU_FPU) ? 'Y' : 'N',
           (uiCpuFeatureFlags & WELS_CPU_3DNOW) ? 'Y' : 'N',
           (uiCpuFeatureFlags & WELS_CPU_3DNOWEXT) ? 'Y' : 'N',
           (uiCpuFeatureFlags & WELS_CPU_ALTIVEC) ? 'Y' : 'N',
           (uiCpuFeatureFlags & WELS_CPU_CMOV) ? 'Y' : 'N',
           (uiCpuFeatureFlags & WELS_CPU_MOVBE) ? 'Y' : 'N',
           (uiCpuFeatureFlags & WELS_CPU_AES) ? 'Y' : 'N',
           uiCpuCores,
           iCacheLineSize);
}

int32_t GetMultipleThreadIdc (SLogContext* pLogCtx, SWelsSvcCodingParam* pCodingParam, int16_t& iSliceNum,
                              int32_t& iCacheLineSize, uint32_t& uiCpuFeatureFlags) {
  // for cpu features detection, Only detect once??
  int32_t uiCpuCores = 0; // number of logic processors on physical processor package, zero logic processors means HTT not supported
  uiCpuFeatureFlags = WelsCPUFeatureDetect (&uiCpuCores); // detect cpu capacity features

#ifdef X86_ASM
  if (uiCpuFeatureFlags & WELS_CPU_CACHELINE_128)
    iCacheLineSize = 128;
  else if (uiCpuFeatureFlags & WELS_CPU_CACHELINE_64)
    iCacheLineSize = 64;
  else if (uiCpuFeatureFlags & WELS_CPU_CACHELINE_32)
    iCacheLineSize = 32;
  else if (uiCpuFeatureFlags & WELS_CPU_CACHELINE_16)
    iCacheLineSize = 16;
  OutputCpuFeaturesLog (pLogCtx, uiCpuFeatureFlags, uiCpuCores, iCacheLineSize);
#else
  iCacheLineSize = 16; // 16 bytes aligned in default
#endif//X86_ASM

  if (pCodingParam->iMultipleThreadIdc > 0)
    uiCpuCores = pCodingParam->iMultipleThreadIdc;
  else {
    if (uiCpuCores ==
        0) { // cpuid not supported or doesn't expose the number of cores, use high level system API as followed to detect number of pysical/logic processor
      uiCpuCores = DynamicDetectCpuCores();
    }// So far so many cpu cores up to MAX_THREADS_NUM mean for server platforms,
    // for client application here it is constrained by maximal to MAX_THREADS_NUM
  }
  uiCpuCores = WELS_CLIP3 (uiCpuCores, 1, MAX_THREADS_NUM);

  if (InitSliceSettings (pLogCtx, pCodingParam, uiCpuCores, &iSliceNum)) {
    WelsLog (pLogCtx, WELS_LOG_ERROR, "GetMultipleThreadIdc(), InitSliceSettings failed.");
    return 1;
  }
  return 0;
}

/*!
 * \brief   initialize Wels avc encoder core library
 * \pParam  ppCtx       sWelsEncCtx**
 * \pParam  pParam      SWelsSvcCodingParam*
 * \return  successful - 0; otherwise none 0 for failed
 */
int32_t WelsInitEncoderExt (sWelsEncCtx** ppCtx, SWelsSvcCodingParam* pCodingParam, SLogContext* pLogCtx,
                            SExistingParasetList* pExistingParasetList) {
  sWelsEncCtx* pCtx      = NULL;
  int32_t iRet           = 0;
  int16_t iSliceNum      = 1;    // number of slices used
  int32_t iCacheLineSize = 16;   // on chip cache line size in byte
  uint32_t uiCpuFeatureFlags = 0;
  if (NULL == ppCtx || NULL == pCodingParam) {
    WelsLog (pLogCtx, WELS_LOG_ERROR, "WelsInitEncoderExt(), NULL == ppCtx(0x%p) or NULL == pCodingParam(0x%p).",
             (void*)ppCtx, (void*)pCodingParam);
    return 1;
  }

  iRet = ParamValidationExt (pLogCtx, pCodingParam);
  if (iRet != 0) {
    WelsLog (pLogCtx, WELS_LOG_ERROR, "WelsInitEncoderExt(), ParamValidationExt failed return %d.", iRet);
    return iRet;
  }
  iRet = pCodingParam->DetermineTemporalSettings();
  if (iRet != ENC_RETURN_SUCCESS) {
    WelsLog (pLogCtx, WELS_LOG_ERROR,
             "WelsInitEncoderExt(), DetermineTemporalSettings failed return %d (check in/out frame rate and temporal layer setting! -- in/out = 2^x, x <= temppral_layer_num)",
             iRet);
    return iRet;
  }
  iRet = GetMultipleThreadIdc (pLogCtx, pCodingParam, iSliceNum, iCacheLineSize, uiCpuFeatureFlags);
  if (iRet != 0) {
    WelsLog (pLogCtx, WELS_LOG_ERROR, "WelsInitEncoderExt(), GetMultipleThreadIdc failed return %d.", iRet);
    return iRet;
  }


  *ppCtx = NULL;

  pCtx = static_cast<sWelsEncCtx*> (malloc (sizeof (sWelsEncCtx)));

  WELS_VERIFY_RETURN_IF (1, (NULL == pCtx))
  memset (pCtx, 0, sizeof (sWelsEncCtx));

  pCtx->sLogCtx = *pLogCtx;

  pCtx->pMemAlign = new CMemoryAlign (iCacheLineSize);
  WELS_VERIFY_RETURN_PROC_IF (1, (NULL == pCtx->pMemAlign), FreeMemorySvc (&pCtx))

  iRet = AllocCodingParam (&pCtx->pSvcParam, pCtx->pMemAlign);
  if (iRet != 0) {
    FreeMemorySvc (&pCtx);
    return iRet;
  }
  memcpy (pCtx->pSvcParam, pCodingParam, sizeof (SWelsSvcCodingParam)); // confirmed_safe_unsafe_usage

  pCtx->pFuncList = (SWelsFuncPtrList*)pCtx->pMemAlign->WelsMalloc (sizeof (SWelsFuncPtrList), "SWelsFuncPtrList");
  if (NULL == pCtx->pFuncList) {
    FreeMemorySvc (&pCtx);
    return 1;
  }
  InitFunctionPointers (pCtx, pCtx->pSvcParam, uiCpuFeatureFlags);

  pCtx->iActiveThreadsNum = pCodingParam->iCountThreadsNum;
  pCtx->iMaxSliceCount = iSliceNum;
  iRet = RequestMemorySvc (&pCtx, pExistingParasetList);
  if (iRet != 0) {
    WelsLog (pLogCtx, WELS_LOG_ERROR, "WelsInitEncoderExt(), RequestMemorySvc failed return %d.", iRet);
    FreeMemorySvc (&pCtx);
    return iRet;
  }

  if (pCodingParam->iMultipleThreadIdc > 1) {
    iRet = CreateSliceThreads (pCtx);
    if (iRet != 0) {
      WelsLog (pLogCtx, WELS_LOG_ERROR, "WelsInitEncoderExt(), CreateSliceThreads failed return %d.", iRet);
      FreeMemorySvc (&pCtx);
      return iRet;
    }
  }
  if (pCodingParam->iEntropyCodingModeFlag)
    WelsCabacInit (pCtx);
  WelsRcInitModule (pCtx,  pCtx->pSvcParam->iRCMode);

  pCtx->pVpp = new CWelsPreProcess (pCtx);
  if (pCtx->pVpp == NULL) {
    iRet = 1;
    WelsLog (pLogCtx, WELS_LOG_ERROR, "WelsInitEncoderExt(), pOut of memory in case new CWelsPreProcess().");
    FreeMemorySvc (&pCtx);
    return iRet;
  }
  if ((iRet = pCtx->pVpp->AllocSpatialPictures (pCtx, pCtx->pSvcParam)) != 0) {
    WelsLog (pLogCtx, WELS_LOG_ERROR, "WelsInitEncoderExt(), pVPP alloc spatial pictures failed");
    FreeMemorySvc (&pCtx);
    return iRet;
  }

#if defined(MEMORY_MONITOR)
  WelsLog (pLogCtx, WELS_LOG_INFO, "WelsInitEncoderExt() exit, overall memory usage: %llu bytes",
           static_cast<unsigned long long> (sizeof (sWelsEncCtx) /* requested size from malloc() or new operator */
               + pCtx->pMemAlign->WelsGetMemoryUsage())  /* requested size from CMemoryAlign::WelsMalloc() */
          );
#endif//MEMORY_MONITOR

  pCtx->iStatisticsLogInterval = STATISTICS_LOG_INTERVAL_MS;

  *ppCtx = pCtx;

  WelsLog (pLogCtx, WELS_LOG_DEBUG, "WelsInitEncoderExt(), pCtx= 0x%p.", (void*)pCtx);

  return 0;
}
/*
 *
 * status information output
 */
#if defined(STAT_OUTPUT)
void StatOverallEncodingExt (sWelsEncCtx* pCtx) {
  int8_t i = 0;
  int8_t j = 0;
  for (i = 0; i < pCtx->pSvcParam->iSpatialLayerNum; i++) {
    fprintf (stdout, "\nDependency layer : %d\n", i);
    fprintf (stdout, "Quality layer : %d\n", j);
    {
      const int32_t iCount = pCtx->sStatData[i][j].sSliceData.iSliceCount[I_SLICE] +
                             pCtx->sStatData[i][j].sSliceData.iSliceCount[P_SLICE] +
                             pCtx->sStatData[i][j].sSliceData.iSliceCount[B_SLICE];
#if defined(MB_TYPES_CHECK)
      if (iCount > 0) {
        int32_t iCountNumIMb = pCtx->sStatData[i][j].sSliceData.iMbCount[I_SLICE][Intra4x4] +
                               pCtx->sStatData[i][j].sSliceData.iMbCount[I_SLICE][Intra16x16] + pCtx->sStatData[i][j].sSliceData.iMbCount[I_SLICE][7];
        int32_t iCountNumPMb =  pCtx->sStatData[i][j].sSliceData.iMbCount[P_SLICE][Intra4x4] +
                                pCtx->sStatData[i][j].sSliceData.iMbCount[P_SLICE][Intra16x16] +
                                pCtx->sStatData[i][j].sSliceData.iMbCount[P_SLICE][7] +
                                pCtx->sStatData[i][j].sSliceData.iMbCount[P_SLICE][Inter16x16] +
                                pCtx->sStatData[i][j].sSliceData.iMbCount[P_SLICE][Inter16x8] +
                                pCtx->sStatData[i][j].sSliceData.iMbCount[P_SLICE][Inter8x16] +
                                pCtx->sStatData[i][j].sSliceData.iMbCount[P_SLICE][Inter8x8] +
                                pCtx->sStatData[i][j].sSliceData.iMbCount[P_SLICE][10] +
                                pCtx->sStatData[i][j].sSliceData.iMbCount[P_SLICE][PSkip];
        int32_t count_p_mbL0 =  pCtx->sStatData[i][j].sSliceData.iMbCount[P_SLICE][Inter16x16] +
                                pCtx->sStatData[i][j].sSliceData.iMbCount[P_SLICE][Inter16x8] +
                                pCtx->sStatData[i][j].sSliceData.iMbCount[P_SLICE][Inter8x16] +
                                pCtx->sStatData[i][j].sSliceData.iMbCount[P_SLICE][Inter8x8] +
                                pCtx->sStatData[i][j].sSliceData.iMbCount[P_SLICE][10];

        int32_t iMbCount = iCountNumIMb + iCountNumPMb;
        if (iMbCount > 0) {
          fprintf (stderr,
                   "SVC: overall Slices MBs: %d Avg\nI4x4: %.3f%% I16x16: %.3f%% IBL: %.3f%%\nP16x16: %.3f%% P16x8: %.3f%% P8x16: %.3f%% P8x8: %.3f%% SUBP8x8: %.3f%% PSKIP: %.3f%%\nILP(All): %.3f%% ILP(PL0): %.3f%% BLSKIP(PL0): %.3f%% RP(PL0): %.3f%%\n",
                   iMbCount,
                   (100.0f * (pCtx->sStatData[i][j].sSliceData.iMbCount[I_SLICE][Intra4x4] +
                              pCtx->sStatData[i][j].sSliceData.iMbCount[P_SLICE][Intra4x4]) / iMbCount),
                   (100.0f * (pCtx->sStatData[i][j].sSliceData.iMbCount[I_SLICE][Intra16x16] +
                              pCtx->sStatData[i][j].sSliceData.iMbCount[P_SLICE][Intra16x16]) / iMbCount),
                   (100.0f * (pCtx->sStatData[i][j].sSliceData.iMbCount[I_SLICE][7] +
                              pCtx->sStatData[i][j].sSliceData.iMbCount[P_SLICE][7]) / iMbCount),
                   (100.0f * pCtx->sStatData[i][j].sSliceData.iMbCount[P_SLICE][Inter16x16] / iMbCount),
                   (100.0f * pCtx->sStatData[i][j].sSliceData.iMbCount[P_SLICE][Inter16x8] / iMbCount),
                   (100.0f * pCtx->sStatData[i][j].sSliceData.iMbCount[P_SLICE][Inter8x16] / iMbCount),
                   (100.0f * pCtx->sStatData[i][j].sSliceData.iMbCount[P_SLICE][Inter8x8] / iMbCount),
                   (100.0f * pCtx->sStatData[i][j].sSliceData.iMbCount[P_SLICE][10] / iMbCount),
                   (100.0f * pCtx->sStatData[i][j].sSliceData.iMbCount[P_SLICE][PSkip] / iMbCount),
                   (100.0f * pCtx->sStatData[i][j].sSliceData.iMbCount[P_SLICE][11] / iMbCount),
                   (100.0f * pCtx->sStatData[i][j].sSliceData.iMbCount[P_SLICE][11] / count_p_mbL0),
                   (100.0f * pCtx->sStatData[i][j].sSliceData.iMbCount[P_SLICE][8] / count_p_mbL0),
                   (100.0f * pCtx->sStatData[i][j].sSliceData.iMbCount[P_SLICE][9] / count_p_mbL0)
                  );
        }
      }
#endif //#if defined(MB_TYPES_CHECK)

      if (iCount > 0) {
        fprintf (stdout, "SVC: overall PSNR Y: %2.3f U: %2.3f V: %2.3f kb/s: %.1f fps: %.3f\n\n",
                 (pCtx->sStatData[i][j].sQualityStat.rYPsnr[I_SLICE] + pCtx->sStatData[i][j].sQualityStat.rYPsnr[P_SLICE] +
                  pCtx->sStatData[i][j].sQualityStat.rYPsnr[B_SLICE]) / (float) (iCount),
                 (pCtx->sStatData[i][j].sQualityStat.rUPsnr[I_SLICE] + pCtx->sStatData[i][j].sQualityStat.rUPsnr[P_SLICE] +
                  pCtx->sStatData[i][j].sQualityStat.rUPsnr[B_SLICE]) / (float) (iCount),
                 (pCtx->sStatData[i][j].sQualityStat.rVPsnr[I_SLICE] + pCtx->sStatData[i][j].sQualityStat.rVPsnr[P_SLICE] +
                  pCtx->sStatData[i][j].sQualityStat.rVPsnr[B_SLICE]) / (float) (iCount),
                 1.0f * pCtx->pSvcParam->sDependencyLayers[i].fOutputFrameRate * (pCtx->sStatData[i][j].sSliceData.iSliceSize[I_SLICE] +
                     pCtx->sStatData[i][j].sSliceData.iSliceSize[P_SLICE] + pCtx->sStatData[i][j].sSliceData.iSliceSize[B_SLICE]) / (float) (
                   iCount + pCtx->pWelsSvcRc[i].iSkipFrameNum) / 1000,
                 1.0f * pCtx->pSvcParam->sDependencyLayers[i].fOutputFrameRate);

      }

    }

  }
}
#endif
/*!
 * \brief   uninitialize Wels encoder core library
 * \pParam  pEncCtx     sWelsEncCtx*
 * \return  none
 */
void WelsUninitEncoderExt (sWelsEncCtx** ppCtx) {
  if (NULL == ppCtx || NULL == *ppCtx)
    return;

  WelsLog (& (*ppCtx)->sLogCtx, WELS_LOG_INFO,
           "WelsUninitEncoderExt(), pCtx= %p, iThreadCount= %d, iMultipleThreadIdc= %d.",
           (void*) (*ppCtx), (*ppCtx)->pSvcParam->iCountThreadsNum, (*ppCtx)->pSvcParam->iMultipleThreadIdc);

#if defined(STAT_OUTPUT)
  StatOverallEncodingExt (*ppCtx);
#endif

  if ((*ppCtx)->pSvcParam->iMultipleThreadIdc > 1 && (*ppCtx)->pSliceThreading != NULL) {
    const int32_t iThreadCount = (*ppCtx)->pSvcParam->iCountThreadsNum;
    int32_t iThreadIdx = 0;

    while (iThreadIdx < iThreadCount) {
      int res = 0;
      if ((*ppCtx)->pSliceThreading->pThreadHandles[iThreadIdx]) {
        WelsEventSignal (& (*ppCtx)->pSliceThreading->pExitEncodeEvent[iThreadIdx]);
        WelsEventSignal (& (*ppCtx)->pSliceThreading->pThreadMasterEvent[iThreadIdx]);
        res = WelsThreadJoin ((*ppCtx)->pSliceThreading->pThreadHandles[iThreadIdx]); // waiting thread exit
        WelsLog (& (*ppCtx)->sLogCtx, WELS_LOG_INFO, "WelsUninitEncoderExt(), pthread_join(pThreadHandles%d) return %d..",
                 iThreadIdx,
                 res);
        (*ppCtx)->pSliceThreading->pThreadHandles[iThreadIdx] = 0;
      }
      ++ iThreadIdx;
    }
  }

  if ((*ppCtx)->pVpp) {
    (*ppCtx)->pVpp->FreeSpatialPictures (*ppCtx);
    delete (*ppCtx)->pVpp;
    (*ppCtx)->pVpp = NULL;
  }
  FreeMemorySvc (ppCtx);
  *ppCtx = NULL;
}

/*!
 * \brief   get temporal level due to configuration and coding context
 */
int32_t GetTemporalLevel (SSpatialLayerInternal* fDlp, const int32_t kiFrameNum, const int32_t kiGopSize) {
  const int32_t kiCodingIdx = kiFrameNum & (kiGopSize - 1);

  return fDlp->uiCodingIdx2TemporalId[kiCodingIdx];
}

void DynslcUpdateMbNeighbourInfoListForAllSlices (SSliceCtx* pSliceCtx, SMB* pMbList) {
  const int32_t kiMbWidth       = pSliceCtx->iMbWidth;
  const int32_t kiEndMbInSlice  = pSliceCtx->iMbNumInFrame - 1;
  int32_t  iIdx                 = 0;

  do {
    SMB* pMb = &pMbList[iIdx];
    uint32_t uiNeighborAvailFlag        = 0;
    const int32_t kiMbXY                = pMb->iMbXY;
    const int32_t kiMbX                 = pMb->iMbX;
    const int32_t kiMbY                 = pMb->iMbY;
    bool     bLeft;
    bool     bTop;
    bool     bLeftTop;
    bool     bRightTop;
    uint16_t  uiSliceIdc;
    int32_t   iLeftXY, iTopXY, iLeftTopXY, iRightTopXY;

    uiSliceIdc = WelsMbToSliceIdc (pSliceCtx, kiMbXY);
    pMb->uiSliceIdc = uiSliceIdc;
    iLeftXY = kiMbXY - 1;
    iTopXY = kiMbXY - kiMbWidth;
    iLeftTopXY = iTopXY - 1;
    iRightTopXY = iTopXY + 1;

    bLeft = (kiMbX > 0) && (uiSliceIdc == WelsMbToSliceIdc (pSliceCtx, iLeftXY));
    bTop = (kiMbY > 0) && (uiSliceIdc == WelsMbToSliceIdc (pSliceCtx, iTopXY));
    bLeftTop = (kiMbX > 0) && (kiMbY > 0) && (uiSliceIdc == WelsMbToSliceIdc (pSliceCtx, iLeftTopXY));
    bRightTop = (kiMbX < (kiMbWidth - 1)) && (kiMbY > 0) && (uiSliceIdc == WelsMbToSliceIdc (pSliceCtx, iRightTopXY));

    if (bLeft) {
      uiNeighborAvailFlag |= LEFT_MB_POS;
    }
    if (bTop) {
      uiNeighborAvailFlag |= TOP_MB_POS;
    }
    if (bLeftTop) {
      uiNeighborAvailFlag |= TOPLEFT_MB_POS;
    }
    if (bRightTop) {
      uiNeighborAvailFlag |= TOPRIGHT_MB_POS;
    }
    pMb->uiNeighborAvail = (uint8_t)uiNeighborAvailFlag;

    ++ iIdx;
  } while (iIdx <= kiEndMbInSlice);
}

/*
 * TUNE back if number of picture partition decision algorithm based on past if available
 */
int32_t PicPartitionNumDecision (sWelsEncCtx* pCtx) {
  int32_t iPartitionNum = 1;
  if (pCtx->pSvcParam->iMultipleThreadIdc > 1) {
    iPartitionNum = pCtx->pSvcParam->iCountThreadsNum;
  }
  return iPartitionNum;
}

void WelsInitCurrentQBLayerMltslc (sWelsEncCtx* pCtx) {
  //pData init
  SDqLayer*  pCurDq    = pCtx->pCurDqLayer;
  SSliceCtx* pSliceCtx = (pCurDq->pSliceEncCtx);

  //mb_neighbor
  DynslcUpdateMbNeighbourInfoListForAllSlices (pSliceCtx, pCurDq->sMbDataP);
}

void UpdateSlicepEncCtxWithPartition (SSliceCtx* pSliceCtx, int32_t iPartitionNum) {
  const int32_t kiMbNumInFrame          = pSliceCtx->iMbNumInFrame;
  int32_t iCountMbNumPerPartition       = kiMbNumInFrame;
  int32_t iAssignableMbLeft             = kiMbNumInFrame;
  int32_t iFirstMbIdx                   = 0;
  int32_t i/*, j*/;

  if (iPartitionNum <= 0)
    iPartitionNum = 1;
  else if (iPartitionNum > AVERSLICENUM_CONSTRAINT)
    iPartitionNum = AVERSLICENUM_CONSTRAINT; // AVERSLICENUM_CONSTRAINT might be variable, however not fixed by MACRO
  iCountMbNumPerPartition /= iPartitionNum;
  pSliceCtx->iSliceNumInFrame = iPartitionNum;
  i = 0;
  while (i < iPartitionNum) {
    if (i + 1 == iPartitionNum) {
      pSliceCtx->pCountMbNumInSlice[i] = iAssignableMbLeft;
    } else {
      pSliceCtx->pCountMbNumInSlice[i] = iCountMbNumPerPartition;
    }
    pSliceCtx->pFirstMbInSlice[i] = iFirstMbIdx;

    WelsSetMemMultiplebytes_c (pSliceCtx->pOverallMbMap + iFirstMbIdx, i,
                               pSliceCtx->pCountMbNumInSlice[i], sizeof (uint16_t));

    // for next partition(or pSlice)
    iFirstMbIdx += pSliceCtx->pCountMbNumInSlice[i];
    iAssignableMbLeft -= pSliceCtx->pCountMbNumInSlice[i];
    ++ i;
  }
}

void WelsInitCurrentDlayerMltslc (sWelsEncCtx* pCtx, int32_t iPartitionNum) {
  SDqLayer* pCurDq      = pCtx->pCurDqLayer;
  SSliceCtx* pSliceCtx  = pCurDq->pSliceEncCtx;

  UpdateSlicepEncCtxWithPartition (pSliceCtx, iPartitionNum);

  if (I_SLICE == pCtx->eSliceType) { //check if uiSliceSizeConstraint too small
#define byte_complexIMBat26 (60)
    uint8_t iCurDid    = pCtx->uiDependencyId;
    uint32_t uiFrmByte = 0;

    if (pCtx->pSvcParam->iRCMode != RC_OFF_MODE) {
      //RC case
      uiFrmByte = (
                    ((uint32_t) (pCtx->pSvcParam->sSpatialLayers[iCurDid].iSpatialBitrate)
                     / (uint32_t) (pCtx->pSvcParam->sDependencyLayers[iCurDid].fInputFrameRate)) >> 3);
    } else {
      //fixed QP case
      const int32_t iTtlMbNumInFrame = pSliceCtx->iMbNumInFrame;
      int32_t iQDeltaTo26 = (26 - pCtx->pSvcParam->sSpatialLayers[iCurDid].iDLayerQp);

      uiFrmByte = (iTtlMbNumInFrame * byte_complexIMBat26);
      if (iQDeltaTo26 > 0) {
        //smaller QP than 26
        uiFrmByte = (uint32_t) (uiFrmByte * ((float)iQDeltaTo26 / 4));
      } else if (iQDeltaTo26 < 0) {
        //larger QP than 26
        iQDeltaTo26 = ((-iQDeltaTo26) >> 2);   //delta mod 4
        uiFrmByte = (uiFrmByte >> (iQDeltaTo26));   //if delta 4, byte /2
      }
    }

    //MINPACKETSIZE_CONSTRAINT
    if (pSliceCtx->uiSliceSizeConstraint
        <
        (uint32_t) (uiFrmByte//suppose 16 byte per mb at average
                    / (pSliceCtx->iMaxSliceNumConstraint))
       ) {

      WelsLog (& (pCtx->sLogCtx),
               WELS_LOG_WARNING,
               "Set-SliceConstraint(%d) too small for current resolution (MB# %d) under QP/BR!",
               pSliceCtx->uiSliceSizeConstraint,
               pSliceCtx->iMbNumInFrame
              );
    }
  }

  WelsInitCurrentQBLayerMltslc (pCtx);
}

/*!
 * \brief   initialize current layer
 */
void WelsInitCurrentLayer (sWelsEncCtx* pCtx,
                           const int32_t kiWidth,
                           const int32_t kiHeight) {
  SWelsSvcCodingParam* pParam   = pCtx->pSvcParam;
  SPicture* pEncPic             = pCtx->pEncPic;
  SPicture* pDecPic             = pCtx->pDecPic;
  SDqLayer* pCurDq              = pCtx->pCurDqLayer;
  SSlice* pBaseSlice            = &pCurDq->sLayerInfo.pSliceInLayer[0];
  SSlice* pSlice                = NULL;
  const uint8_t kiCurDid        = pCtx->uiDependencyId;
  const bool kbUseSubsetSpsFlag = (!pParam->bSimulcastAVC) && (kiCurDid > BASE_DEPENDENCY_ID);
  SSpatialLayerConfig* fDlp     = &pParam->sSpatialLayers[kiCurDid];
  SNalUnitHeaderExt* pNalHdExt  = &pCurDq->sLayerInfo.sNalHeaderExt;
  SNalUnitHeader* pNalHd        = &pNalHdExt->sNalUnitHeader;
  SDqIdc* pDqIdc                = &pCtx->pDqIdcMap[kiCurDid];
  int32_t iIdx                  = 0;
  int32_t iSliceCount           = 0;

  if (NULL == pCurDq)
    return;

  pCurDq->pDecPic = pDecPic;

  if (fDlp->sSliceArgument.uiSliceMode == SM_SIZELIMITED_SLICE) // need get extra slices for update
    iSliceCount = GetInitialSliceNum (pCurDq->iMbWidth, pCurDq->iMbHeight, &fDlp->sSliceArgument);
  else
    iSliceCount = GetCurrentSliceNum (pCurDq->pSliceEncCtx);
  assert (iSliceCount > 0);

  int32_t iCurPpsId = pDqIdc->iPpsId;
  int32_t iCurSpsId = pDqIdc->iSpsId;

  if (SPS_PPS_LISTING == pParam->eSpsPpsIdStrategy) {
    iCurPpsId = pCtx->sPSOVector.iPpsIdList[pDqIdc->iPpsId][WELS_ABS (pCtx->uiIdrPicId - 1) % MAX_PPS_COUNT];
  }

  pBaseSlice->sSliceHeaderExt.sSliceHeader.iPpsId       = iCurPpsId;
  pCurDq->sLayerInfo.pPpsP                              =
    pBaseSlice->sSliceHeaderExt.sSliceHeader.pPps       = &pCtx->pPPSArray[iCurPpsId];

  pBaseSlice->sSliceHeaderExt.sSliceHeader.iSpsId       = iCurSpsId;
  if (kbUseSubsetSpsFlag) {
    pCurDq->sLayerInfo.pSubsetSpsP                      = &pCtx->pSubsetArray[iCurSpsId];
    pCurDq->sLayerInfo.pSpsP                            =
      pBaseSlice->sSliceHeaderExt.sSliceHeader.pSps     = &pCurDq->sLayerInfo.pSubsetSpsP->pSps;
  } else {
    pCurDq->sLayerInfo.pSubsetSpsP                      = NULL;
    pCurDq->sLayerInfo.pSpsP                            =
      pBaseSlice->sSliceHeaderExt.sSliceHeader.pSps     = &pCtx->pSpsArray[iCurSpsId];
  }

  pBaseSlice->bSliceHeaderExtFlag = (NAL_UNIT_CODED_SLICE_EXT == pCtx->eNalType);

  pSlice = pBaseSlice;
  iIdx = 1;
  while (iIdx < iSliceCount) {
    ++ pSlice;
    pSlice->sSliceHeaderExt.sSliceHeader.iPpsId = pBaseSlice->sSliceHeaderExt.sSliceHeader.iPpsId;
    pSlice->sSliceHeaderExt.sSliceHeader.pPps   = pBaseSlice->sSliceHeaderExt.sSliceHeader.pPps;
    pSlice->sSliceHeaderExt.sSliceHeader.iSpsId = pBaseSlice->sSliceHeaderExt.sSliceHeader.iSpsId;
    pSlice->sSliceHeaderExt.sSliceHeader.pSps   = pBaseSlice->sSliceHeaderExt.sSliceHeader.pSps;
    pSlice->bSliceHeaderExtFlag                 = pBaseSlice->bSliceHeaderExtFlag;
    ++ iIdx;
  }

  memset (pNalHdExt, 0, sizeof (SNalUnitHeaderExt));
  pNalHd->uiNalRefIdc                   = pCtx->eNalPriority;
  pNalHd->eNalUnitType                  = pCtx->eNalType;

  pNalHdExt->uiDependencyId             = kiCurDid;
  pNalHdExt->bDiscardableFlag           = (pCtx->bNeedPrefixNalFlag) ? (pNalHd->uiNalRefIdc == NRI_PRI_LOWEST) : false;
  pNalHdExt->bIdrFlag                   = (pCtx->iFrameNum == 0) && ((pCtx->eNalType == NAL_UNIT_CODED_SLICE_IDR)
                                          || (pCtx->eSliceType == I_SLICE));
  pNalHdExt->uiTemporalId               = pCtx->uiTemporalId;

  // pEncPic pData
  pCurDq->pEncData[0]   = pEncPic->pData[0];
  pCurDq->pEncData[1]   = pEncPic->pData[1];
  pCurDq->pEncData[2]   = pEncPic->pData[2];
  pCurDq->iEncStride[0] = pEncPic->iLineSize[0];
  pCurDq->iEncStride[1] = pEncPic->iLineSize[1];
  pCurDq->iEncStride[2] = pEncPic->iLineSize[2];
  // cs pData
  pCurDq->pCsData[0]    = pDecPic->pData[0];
  pCurDq->pCsData[1]    = pDecPic->pData[1];
  pCurDq->pCsData[2]    = pDecPic->pData[2];
  pCurDq->iCsStride[0]  = pDecPic->iLineSize[0];
  pCurDq->iCsStride[1]  = pDecPic->iLineSize[1];
  pCurDq->iCsStride[2]  = pDecPic->iLineSize[2];

  if (pCurDq->pRefLayer != NULL) {
    pCurDq->bBaseLayerAvailableFlag = true;
  } else {
    pCurDq->bBaseLayerAvailableFlag = false;
  }

  if (pCtx->pTaskManage) {
    pCtx->pTaskManage->InitFrame(kiCurDid);
  }
}

static inline void SetFastCodingFunc (SWelsFuncPtrList* pFuncList) {
  pFuncList->pfIntraFineMd = WelsMdIntraFinePartitionVaa;
  pFuncList->sSampleDealingFuncs.pfMdCost = pFuncList->sSampleDealingFuncs.pfSampleSad;
  pFuncList->sSampleDealingFuncs.pfIntra16x16Combined3 = pFuncList->sSampleDealingFuncs.pfIntra16x16Combined3Sad;
  pFuncList->sSampleDealingFuncs.pfIntra8x8Combined3 = pFuncList->sSampleDealingFuncs.pfIntra8x8Combined3Sad;
}
static inline void SetNormalCodingFunc (SWelsFuncPtrList* pFuncList) {
  pFuncList->pfIntraFineMd = WelsMdIntraFinePartition;
  pFuncList->sSampleDealingFuncs.pfMdCost = pFuncList->sSampleDealingFuncs.pfSampleSatd;
  pFuncList->sSampleDealingFuncs.pfIntra16x16Combined3 =
    pFuncList->sSampleDealingFuncs.pfIntra16x16Combined3Satd;
  pFuncList->sSampleDealingFuncs.pfIntra8x8Combined3 =
    pFuncList->sSampleDealingFuncs.pfIntra8x8Combined3Satd;
  pFuncList->sSampleDealingFuncs.pfIntra4x4Combined3 =
    pFuncList->sSampleDealingFuncs.pfIntra4x4Combined3Satd;
}
bool SetMeMethod (const uint8_t uiMethod, PSearchMethodFunc& pSearchMethodFunc) {
  switch (uiMethod) {
  case  ME_DIA:
    pSearchMethodFunc  = WelsDiamondSearch;
    break;
  case  ME_CROSS:
    pSearchMethodFunc = WelsMotionCrossSearch;
    break;
  case  ME_DIA_CROSS:
    pSearchMethodFunc = WelsDiamondCrossSearch;
    break;
  case  ME_DIA_CROSS_FME:
    pSearchMethodFunc = WelsDiamondCrossFeatureSearch;
    break;
  case ME_FULL:
    pSearchMethodFunc = WelsDiamondSearch;
    return false;
  default:
    pSearchMethodFunc = WelsDiamondSearch;
    return false;
  }
  return true;
}



void PreprocessSliceCoding (sWelsEncCtx* pCtx) {
  SDqLayer* pCurLayer           = pCtx->pCurDqLayer;
  //const bool kbBaseAvail      = pCurLayer->bBaseLayerAvailableFlag;
  const bool kbHighestSpatialLayer =
    (pCtx->pSvcParam->iSpatialLayerNum == (pCurLayer->sLayerInfo.sNalHeaderExt.uiDependencyId + 1));
  SWelsFuncPtrList* pFuncList = pCtx->pFuncList;
  SLogContext* pLogCtx = & (pCtx->sLogCtx);
  /* function pointers conditional assignment under sWelsEncCtx, layer_mb_enc_rec (in stack) is exclusive */
  if ((pCtx->pSvcParam->iUsageType == CAMERA_VIDEO_REAL_TIME && kbHighestSpatialLayer) ||
      (pCtx->pSvcParam->iUsageType == SCREEN_CONTENT_REAL_TIME && P_SLICE == pCtx->eSliceType
       && kbHighestSpatialLayer) //TODO: here is for sync with the origin code, consider the design again with more tests
     ) {
    SetFastCodingFunc (pFuncList);
  } else {
    SetNormalCodingFunc (pFuncList);
  }

  if (P_SLICE == pCtx->eSliceType) {
    for (int i = 0; i < BLOCK_STATIC_IDC_ALL; i++) {
      pFuncList->pfMotionSearch[i] = WelsMotionEstimateSearch;
    }
    pFuncList->pfSearchMethod[BLOCK_16x16]  =
      pFuncList->pfSearchMethod[BLOCK_16x8] =
        pFuncList->pfSearchMethod[BLOCK_8x16] =
          pFuncList->pfSearchMethod[BLOCK_8x8] =
            pFuncList->pfSearchMethod[BLOCK_4x4] =
              pFuncList->pfSearchMethod[BLOCK_8x4] =
                pFuncList->pfSearchMethod[BLOCK_4x8] = WelsDiamondSearch;
    pFuncList->pfFirstIntraMode = WelsMdFirstIntraMode;
    pFuncList->sSampleDealingFuncs.pfMeCost = pCtx->pFuncList->sSampleDealingFuncs.pfSampleSatd;
    pFuncList->pfSetScrollingMv = SetScrollingMvToMdNull;

    if (kbHighestSpatialLayer) {
      pFuncList->pfCalculateSatd = NotCalculateSatdCost;
      pFuncList->pfInterFineMd = WelsMdInterFinePartitionVaa;
    } else {
      pFuncList->pfCalculateSatd = CalculateSatdCost;
      pFuncList->pfInterFineMd = WelsMdInterFinePartition;
    }
  } else {
    pFuncList->sSampleDealingFuncs.pfMeCost = NULL;
  }

  //to init at each frame will be needed when dealing with hybrid content (camera+screen)
  if (pCtx->pSvcParam->iUsageType == SCREEN_CONTENT_REAL_TIME) {
    if (P_SLICE == pCtx->eSliceType) {
      //MD related func pointers
      pFuncList->pfInterFineMd = WelsMdInterFinePartitionVaaOnScreen;

      //ME related func pointers
      SVAAFrameInfoExt* pVaaExt = static_cast<SVAAFrameInfoExt*> (pCtx->pVaa);
      if (pVaaExt->sScrollDetectInfo.bScrollDetectFlag
          && (pVaaExt->sScrollDetectInfo.iScrollMvX | pVaaExt->sScrollDetectInfo.iScrollMvY)) {
        pFuncList->pfSetScrollingMv = SetScrollingMvToMd;
      } else {
        pFuncList->pfSetScrollingMv = SetScrollingMvToMdNull;
      }

      pFuncList->pfMotionSearch[NO_STATIC] = WelsMotionEstimateSearch;
      pFuncList->pfMotionSearch[COLLOCATED_STATIC] = WelsMotionEstimateSearchStatic;
      pFuncList->pfMotionSearch[SCROLLED_STATIC] = WelsMotionEstimateSearchScrolled;
      //ME16x16
      if (!SetMeMethod (ME_DIA_CROSS, pFuncList->pfSearchMethod[BLOCK_16x16])) {
        WelsLog (pLogCtx, WELS_LOG_WARNING, "SetMeMethod(BLOCK_16x16) ME_DIA_CROSS unsuccessful, switched to default search");
      }
      //ME8x8
      SFeatureSearchPreparation* pFeatureSearchPreparation = pCurLayer->pFeatureSearchPreparation;
      if (pFeatureSearchPreparation) {
        pFeatureSearchPreparation->iHighFreMbCount = 0;

        //calculate bFMESwitchFlag
        SVAAFrameInfoExt* pVaaExt = static_cast<SVAAFrameInfoExt*> (pCtx->pVaa);
        const int32_t kiMbSize = pCurLayer->iMbHeight * pCurLayer->iMbWidth;
        pFeatureSearchPreparation->bFMESwitchFlag = CalcFMESwitchFlag (pFeatureSearchPreparation->uiFMEGoodFrameCount,
            pFeatureSearchPreparation->iHighFreMbCount * 100 / kiMbSize, pCtx->pVaa->sVaaCalcInfo.iFrameSad / kiMbSize,
            pVaaExt->sScrollDetectInfo.bScrollDetectFlag);

        //PerformFMEPreprocess
        SScreenBlockFeatureStorage* pScreenBlockFeatureStorage = pCurLayer->pRefPic->pScreenBlockFeatureStorage;
        pFeatureSearchPreparation->pRefBlockFeature = pScreenBlockFeatureStorage;
        if (pFeatureSearchPreparation->bFMESwitchFlag
            && !pScreenBlockFeatureStorage->bRefBlockFeatureCalculated) {
          SPicture* pRef = (pCtx->pSvcParam->bEnableLongTermReference ? pCurLayer->pRefOri[0] : pCurLayer->pRefPic);
          PerformFMEPreprocess (pFuncList, pRef, pFeatureSearchPreparation->pFeatureOfBlock,
                                pScreenBlockFeatureStorage);
        }

        //assign ME pointer
        if (pFeatureSearchPreparation->bFMESwitchFlag && pScreenBlockFeatureStorage->bRefBlockFeatureCalculated
            && (!pScreenBlockFeatureStorage->iIs16x16)) {
          if (!SetMeMethod (ME_DIA_CROSS_FME, pFuncList->pfSearchMethod[BLOCK_8x8])) {
            WelsLog (pLogCtx, WELS_LOG_WARNING,
                     "SetMeMethod(BLOCK_8x8) ME_DIA_CROSS_FME unsuccessful, switched to default search");
          }
        }

        //assign UpdateFMESwitch pointer
        if (pFeatureSearchPreparation->bFMESwitchFlag) {
          pFuncList->pfUpdateFMESwitch = UpdateFMESwitch;
        } else {
          pFuncList->pfUpdateFMESwitch = UpdateFMESwitchNull;
        }
      }//if (pFeatureSearchPreparation)
    } else {
      //reset some status when at I_SLICE
      pCurLayer->pFeatureSearchPreparation->bFMESwitchFlag = true;
      pCurLayer->pFeatureSearchPreparation->uiFMEGoodFrameCount = FMESWITCH_DEFAULT_GOODFRAME_NUM;
    }
  }

  // update some layer dependent variable to save judgements in mb-level
  pCurLayer->bSatdInMdFlag = ((pFuncList->sSampleDealingFuncs.pfMeCost == pFuncList->sSampleDealingFuncs.pfSampleSatd)
                              && (pFuncList->sSampleDealingFuncs.pfMdCost == pFuncList->sSampleDealingFuncs.pfSampleSatd));

  const int32_t kiCurDid            = pCtx->uiDependencyId;
  const int32_t kiCurTid            = pCtx->uiTemporalId;
  if (pCurLayer->bDeblockingParallelFlag && (pCurLayer->iLoopFilterDisableIdc != 1)
#if !defined(ENABLE_FRAME_DUMP)
      && ( NRI_PRI_LOWEST != pCtx->eNalPriority )
      && (pCtx->pSvcParam->sDependencyLayers[kiCurDid].iHighestTemporalId == 0
          || kiCurTid < pCtx->pSvcParam->sDependencyLayers[kiCurDid].iHighestTemporalId)
#endif// !ENABLE_FRAME_DUMP
     ) {
    pFuncList->pfDeblocking.pfDeblockingFilterSlice = DeblockingFilterSliceAvcbase;
  } else {
    pFuncList->pfDeblocking.pfDeblockingFilterSlice = DeblockingFilterSliceAvcbaseNull;
  }
}

/*!
 * \brief   swap pDq layers between current pDq layer and reference pDq layer
 */

static inline void WelsSwapDqLayers (sWelsEncCtx* pCtx, const int32_t kiNextDqIdx) {
  // swap and assign reference
  SDqLayer* pTmpLayer           = pCtx->ppDqLayerList[kiNextDqIdx];
  SDqLayer* pRefLayer           = pCtx->pCurDqLayer;
  pCtx->pCurDqLayer             = pTmpLayer;
  pCtx->pCurDqLayer->pRefLayer  = pRefLayer;
}

/*!
 * \brief   prefetch reference picture after WelsBuildRefList
 */
static inline void PrefetchReferencePicture (sWelsEncCtx* pCtx, const EVideoFrameType keFrameType) {
  SSlice* pSliceBase = &pCtx->pCurDqLayer->sLayerInfo.pSliceInLayer[0];
  const int32_t kiSliceCount = GetCurrentSliceNum (pCtx->pCurDqLayer->pSliceEncCtx);
  int32_t iIdx = 0;
  uint8_t uiRefIdx = -1;

  assert (kiSliceCount > 0);
  if (keFrameType != videoFrameTypeIDR) {
    assert (pCtx->iNumRef0 > 0);
    pCtx->pRefPic               = pCtx->pRefList0[0];   // always get item 0 due to reordering done
    pCtx->pCurDqLayer->pRefPic  = pCtx->pRefPic;
    uiRefIdx                    = 0;                    // reordered reference iIndex
  } else { // safe for IDR coding
    pCtx->pRefPic               = NULL;
    pCtx->pCurDqLayer->pRefPic  = NULL;
  }

  iIdx = 0;
  while (iIdx < kiSliceCount) {
    pSliceBase->sSliceHeaderExt.sSliceHeader.uiRefIndex = uiRefIdx;
    ++ pSliceBase;
    ++ iIdx;
  }
}


void ParasetIdAdditionIdAdjust (SParaSetOffsetVariable* sParaSetOffsetVariable, const int32_t kiCurEncoderParaSetId,
                                const uint32_t kuiMaxIdInBs) { //paraset_type = 0: SPS; =1: PPS
  //SPS_ID in avc_sps and pSubsetSps will be different using this
  //SPS_ID case example:
  //1st enter:  next_spsid_in_bs == 0; spsid == 0; delta==0;            //actual spsid_in_bs == 0
  //1st finish: next_spsid_in_bs == 1;
  //2nd enter:  next_spsid_in_bs == 1; spsid == 0; delta==1;            //actual spsid_in_bs == 1
  //2nd finish: next_spsid_in_bs == 2;
  //31st enter: next_spsid_in_bs == 31; spsid == 0~2; delta==31~29;     //actual spsid_in_bs == 31
  //31st finish:next_spsid_in_bs == 0;
  //31st enter: next_spsid_in_bs == 0; spsid == 0~2; delta==-2~0;       //actual spsid_in_bs == 0
  //31st finish:next_spsid_in_bs == 1;

  const int32_t kiEncId = kiCurEncoderParaSetId;
  uint32_t uiNextIdInBs = sParaSetOffsetVariable->uiNextParaSetIdToUseInBs;

  //update current layer's pCodingParam
  sParaSetOffsetVariable->iParaSetIdDelta[kiEncId] = uiNextIdInBs -
      kiEncId;  //for current parameter set, change its id_delta
  //write pso pData for next update:
  sParaSetOffsetVariable->bUsedParaSetIdInBs[uiNextIdInBs] = true; //   update current used_id

  //prepare for next update:
  //   find the next avaibable iId
  ++uiNextIdInBs;
  if (uiNextIdInBs >= kuiMaxIdInBs) {
    uiNextIdInBs = 0;//ensure the SPS_ID wound not exceed MAX_SPS_COUNT
  }
  //   update next_id
  sParaSetOffsetVariable->uiNextParaSetIdToUseInBs = uiNextIdInBs;
}

int32_t WelsWriteOneSPS (sWelsEncCtx* pCtx, const int32_t kiSpsIdx, int32_t& iNalSize) {
  int iNal = pCtx->pOut->iNalIndex;
  WelsLoadNal (pCtx->pOut, NAL_UNIT_SPS, NRI_PRI_HIGHEST);

  WelsWriteSpsNal (&pCtx->pSpsArray[kiSpsIdx], &pCtx->pOut->sBsWrite,
                   & (pCtx->sPSOVector.sParaSetOffsetVariable[PARA_SET_TYPE_AVCSPS].iParaSetIdDelta[0]));
  WelsUnloadNal (pCtx->pOut);

  int32_t iReturn = WelsEncodeNal (&pCtx->pOut->sNalList[iNal], NULL,
                                   pCtx->iFrameBsSize - pCtx->iPosBsBuffer,//available buffer to be written, so need to substract the used length
                                   pCtx->pFrameBs + pCtx->iPosBsBuffer,
                                   &iNalSize);
  WELS_VERIFY_RETURN_IFNEQ (iReturn, ENC_RETURN_SUCCESS)

  pCtx->iPosBsBuffer += iNalSize;
  return ENC_RETURN_SUCCESS;
}

int32_t WelsWriteOnePPS (sWelsEncCtx* pCtx, const int32_t kiPpsIdx, int32_t& iNalSize) {
  //TODO
  int32_t iNal = pCtx->pOut->iNalIndex;
  /* generate picture parameter set */
  WelsLoadNal (pCtx->pOut, NAL_UNIT_PPS, NRI_PRI_HIGHEST);
  WelsWritePpsSyntax (&pCtx->pPPSArray[kiPpsIdx], &pCtx->pOut->sBsWrite,
                      ((SPS_PPS_LISTING != pCtx->pSvcParam->eSpsPpsIdStrategy)) ? (& (pCtx->sPSOVector)) : NULL);
  WelsUnloadNal (pCtx->pOut);

  int32_t iReturn = WelsEncodeNal (&pCtx->pOut->sNalList[iNal], NULL,
                                   pCtx->iFrameBsSize - pCtx->iPosBsBuffer,
                                   pCtx->pFrameBs + pCtx->iPosBsBuffer,
                                   &iNalSize);
  WELS_VERIFY_RETURN_IFNEQ (iReturn, ENC_RETURN_SUCCESS)

  pCtx->iPosBsBuffer += iNalSize;
  return ENC_RETURN_SUCCESS;
}

void UpdatePpsList (sWelsEncCtx* pCtx) {
  assert (pCtx->iPpsNum <= MAX_DQ_LAYER_NUM);

  //Generate PPS LIST
  int32_t iPpsId = 0, iUsePpsNum = pCtx->iPpsNum;

  for (int32_t iIdrRound = 0; iIdrRound < MAX_PPS_COUNT; iIdrRound++) {
    for (iPpsId = 0; iPpsId < pCtx->iPpsNum; iPpsId++) {
      pCtx->sPSOVector.iPpsIdList[iPpsId][iIdrRound] = ((iIdrRound * iUsePpsNum + iPpsId) % MAX_PPS_COUNT);
    }
  }

  for (iPpsId = iUsePpsNum; iPpsId < MAX_PPS_COUNT; iPpsId++) {
    memcpy (& (pCtx->pPPSArray[iPpsId]), & (pCtx->pPPSArray[iPpsId % iUsePpsNum]), sizeof (SWelsPPS));
    pCtx->pPPSArray[iPpsId].iPpsId = iPpsId;
    pCtx->iPpsNum++;
  }

  assert (pCtx->iPpsNum == MAX_PPS_COUNT);
  pCtx->sPSOVector.uiInUsePpsNum = pCtx->iPpsNum;

}

/*!
 * \brief   write all parameter sets introduced in SVC extension
 * \return  writing results, success or error
 */
int32_t WelsWriteParameterSets (sWelsEncCtx* pCtx, int32_t* pNalLen, int32_t* pNumNal, int32_t* pTotalLength) {
  int32_t iSize = 0;
  int32_t iNal  = 0;
  int32_t iIdx  = 0;
  int32_t iId   = 0;
  int32_t iCountNal     = 0;
  int32_t iNalLength    = 0;
  int32_t iReturn = ENC_RETURN_SUCCESS;

  if (NULL == pCtx || NULL == pNalLen || NULL == pNumNal)
    return ENC_RETURN_UNEXPECTED;

  *pTotalLength = 0;
  /* write all SPS */
  iIdx = 0;
  while (iIdx < pCtx->iSpsNum) {
    // TODO (Sijia) wrap different operation of eSpsPpsIdStrategy to classes to hide the details
    if (INCREASING_ID == pCtx->pSvcParam->eSpsPpsIdStrategy) {
#if _DEBUG
      pCtx->sPSOVector.eSpsPpsIdStrategy = INCREASING_ID;
      assert (iIdx < MAX_DQ_LAYER_NUM);
#endif

      ParasetIdAdditionIdAdjust (& (pCtx->sPSOVector.sParaSetOffsetVariable[PARA_SET_TYPE_AVCSPS]),
                                 pCtx->pSpsArray[0].uiSpsId,
                                 MAX_SPS_COUNT);
    } else if (CONSTANT_ID == pCtx->pSvcParam->eSpsPpsIdStrategy) {
      memset (& (pCtx->sPSOVector), 0, sizeof (pCtx->sPSOVector));
    }

    /* generate sequence parameters set */
    iId = (SPS_LISTING & pCtx->pSvcParam->eSpsPpsIdStrategy) ? iIdx : 0;

    WelsWriteOneSPS (pCtx, iId, iNalLength);

    pNalLen[iCountNal] = iNalLength;
    iSize += iNalLength;

    ++ iIdx;
    ++ iCountNal;
  }

  /* write all Subset SPS */
  iIdx = 0;
  while (iIdx < pCtx->iSubsetSpsNum) {
    iNal = pCtx->pOut->iNalIndex;

    if (INCREASING_ID == pCtx->pSvcParam->eSpsPpsIdStrategy) {
#if _DEBUG
      pCtx->sPSOVector.eSpsPpsIdStrategy = INCREASING_ID;
      assert (iIdx < MAX_DQ_LAYER_NUM);
#endif

      ParasetIdAdditionIdAdjust (& (pCtx->sPSOVector.sParaSetOffsetVariable[PARA_SET_TYPE_SUBSETSPS]),
                                 pCtx->pSubsetArray[iIdx].pSps.uiSpsId,
                                 MAX_SPS_COUNT);
    }

    iId = iIdx;

    /* generate Subset SPS */
    WelsLoadNal (pCtx->pOut, NAL_UNIT_SUBSET_SPS, NRI_PRI_HIGHEST);

    WelsWriteSubsetSpsSyntax (&pCtx->pSubsetArray[iId], &pCtx->pOut->sBsWrite,
                              & (pCtx->sPSOVector.sParaSetOffsetVariable[PARA_SET_TYPE_SUBSETSPS].iParaSetIdDelta[0]));
    WelsUnloadNal (pCtx->pOut);

    iReturn = WelsEncodeNal (&pCtx->pOut->sNalList[iNal], NULL,
                             pCtx->iFrameBsSize - pCtx->iPosBsBuffer,//available buffer to be written, so need to substract the used length
                             pCtx->pFrameBs + pCtx->iPosBsBuffer,
                             &iNalLength);
    WELS_VERIFY_RETURN_IFNEQ (iReturn, ENC_RETURN_SUCCESS)
    pNalLen[iCountNal] = iNalLength;

    pCtx->iPosBsBuffer  += iNalLength;
    iSize               += iNalLength;

    ++ iIdx;
    ++ iCountNal;
  }

  /* write all PPS */
  if ((SPS_PPS_LISTING == pCtx->pSvcParam->eSpsPpsIdStrategy) && (pCtx->iPpsNum < MAX_PPS_COUNT)) {
    UpdatePpsList (pCtx);
  }

  iIdx = 0;
  while (iIdx < pCtx->iPpsNum) {
    if ((INCREASING_ID & pCtx->pSvcParam->eSpsPpsIdStrategy)) {
      //para_set_type = 2: PPS, use MAX_PPS_COUNT
      ParasetIdAdditionIdAdjust (&pCtx->sPSOVector.sParaSetOffsetVariable[PARA_SET_TYPE_PPS], pCtx->pPPSArray[iIdx].iPpsId,
                                 MAX_PPS_COUNT);
    }

    WelsWriteOnePPS (pCtx, iIdx, iNalLength);

    pNalLen[iCountNal] = iNalLength;
    iSize += iNalLength;

    ++ iIdx;
    ++ iCountNal;
  }

  *pNumNal = iCountNal;
  *pTotalLength = iSize;

  return ENC_RETURN_SUCCESS;
}

static inline int32_t AddPrefixNal (sWelsEncCtx* pCtx,
                                    SLayerBSInfo* pLayerBsInfo,
                                    int32_t* pNalLen,
                                    int32_t* pNalIdxInLayer,
                                    const EWelsNalUnitType keNalType,
                                    const EWelsNalRefIdc keNalRefIdc,
                                    int32_t& iPayloadSize) {
  int32_t iReturn = ENC_RETURN_SUCCESS;
  iPayloadSize = 0;

  if (keNalRefIdc != NRI_PRI_LOWEST) {
    WelsLoadNal (pCtx->pOut, NAL_UNIT_PREFIX, keNalRefIdc);

    WelsWriteSVCPrefixNal (&pCtx->pOut->sBsWrite, keNalRefIdc, (NAL_UNIT_CODED_SLICE_IDR == keNalType));

    WelsUnloadNal (pCtx->pOut);

    iReturn = WelsEncodeNal (&pCtx->pOut->sNalList[pCtx->pOut->iNalIndex - 1],
                             &pCtx->pCurDqLayer->sLayerInfo.sNalHeaderExt,
                             pCtx->iFrameBsSize - pCtx->iPosBsBuffer,
                             pCtx->pFrameBs + pCtx->iPosBsBuffer,
                             &pNalLen[*pNalIdxInLayer]);
    WELS_VERIFY_RETURN_IFNEQ (iReturn, ENC_RETURN_SUCCESS)
    iPayloadSize = pNalLen[*pNalIdxInLayer];

    pCtx->iPosBsBuffer += iPayloadSize;

    (*pNalIdxInLayer) ++;
  } else { // No Prefix NAL Unit RBSP syntax here, but need add NAL Unit Header extension
    WelsLoadNal (pCtx->pOut, NAL_UNIT_PREFIX, keNalRefIdc);
    // No need write any syntax of prefix NAL Unit RBSP here
    WelsUnloadNal (pCtx->pOut);

    iReturn = WelsEncodeNal (&pCtx->pOut->sNalList[pCtx->pOut->iNalIndex - 1],
                             &pCtx->pCurDqLayer->sLayerInfo.sNalHeaderExt,
                             pCtx->iFrameBsSize - pCtx->iPosBsBuffer,
                             pCtx->pFrameBs + pCtx->iPosBsBuffer,
                             &pNalLen[*pNalIdxInLayer]);
    WELS_VERIFY_RETURN_IFNEQ (iReturn, ENC_RETURN_SUCCESS)
    iPayloadSize = pNalLen[*pNalIdxInLayer];

    pCtx->iPosBsBuffer += iPayloadSize;

    (*pNalIdxInLayer) ++;
  }

  return ENC_RETURN_SUCCESS;
}

int32_t WritePadding (sWelsEncCtx* pCtx, int32_t iLen, int32_t& iSize) {
  int32_t i = 0;
  int32_t iNal = 0;
  SBitStringAux* pBs = NULL;
  int32_t iNalLen;

  iSize = 0;
  iNal  = pCtx->pOut->iNalIndex;
  pBs   = &pCtx->pOut->sBsWrite;  // SBitStringAux instance for non VCL NALs decoding

  if ((pBs->pEndBuf - pBs->pCurBuf) < iLen || iNal >= pCtx->pOut->iCountNals) {
#if GOM_TRACE_FLAG
    WelsLog (& (pCtx->sLogCtx), WELS_LOG_ERROR,
             "[RC] paddingcal pBuffer overflow, bufferlen=%lld, paddinglen=%d, iNalIdx= %d, iCountNals= %d",
             static_cast<long long int> (pBs->pEndBuf - pBs->pCurBuf), iLen, iNal, pCtx->pOut->iCountNals);
#endif
    return ENC_RETURN_MEMOVERFLOWFOUND;
  }

  WelsLoadNal (pCtx->pOut, NAL_UNIT_FILLER_DATA, NRI_PRI_LOWEST);

  for (i = 0; i < iLen; i++) {
    BsWriteBits (pBs, 8, 0xff);
  }

  BsRbspTrailingBits (pBs);

  WelsUnloadNal (pCtx->pOut);
  int32_t iReturn = WelsEncodeNal (&pCtx->pOut->sNalList[iNal], NULL,
                                   pCtx->iFrameBsSize - pCtx->iPosBsBuffer,
                                   pCtx->pFrameBs + pCtx->iPosBsBuffer,
                                   &iNalLen);
  WELS_VERIFY_RETURN_IFNEQ (iReturn, ENC_RETURN_SUCCESS)

  pCtx->iPosBsBuffer += iNalLen;
  iSize              += iNalLen;

  return ENC_RETURN_SUCCESS;
}

/*
 * Force coding IDR as follows
 */
int32_t ForceCodingIDR (sWelsEncCtx* pCtx) {
  if (NULL == pCtx)
    return 1;

  pCtx->bEncCurFrmAsIdrFlag = true;
  pCtx->iCodingIndex = 0;
  pCtx->bCheckWindowStatusRefreshFlag = false;

  WelsLog (&pCtx->sLogCtx, WELS_LOG_INFO, "ForceCodingIDR at InputFrameCount=%d\n",
           pCtx->sEncoderStatistics.uiInputFrameCount);
  return 0;
}

int32_t WelsEncoderEncodeParameterSets (sWelsEncCtx* pCtx, void* pDst) {
  SFrameBSInfo* pFbi          = (SFrameBSInfo*)pDst;
  SLayerBSInfo* pLayerBsInfo  = &pFbi->sLayerInfo[0];
  int32_t iCountNal           = 0;
  int32_t iTotalLength        = 0;

  pLayerBsInfo->pBsBuf = pCtx->pFrameBs;
  pLayerBsInfo->pNalLengthInByte = pCtx->pOut->pNalLen;
  InitBits (&pCtx->pOut->sBsWrite, pCtx->pOut->pBsBuffer, pCtx->pOut->uiSize);

  pCtx->iPosBsBuffer = 0;
  int32_t iReturn = WelsWriteParameterSets (pCtx, &pLayerBsInfo->pNalLengthInByte[0], &iCountNal, &iTotalLength);
  WELS_VERIFY_RETURN_IFNEQ (iReturn, ENC_RETURN_SUCCESS)

  pLayerBsInfo->uiSpatialId   = 0;
  pLayerBsInfo->uiTemporalId  = 0;
  pLayerBsInfo->uiQualityId   = 0;
  pLayerBsInfo->uiLayerType   = NON_VIDEO_CODING_LAYER;
  pLayerBsInfo->iNalCount     = iCountNal;

  //pCtx->eLastNalPriority      = NRI_PRI_HIGHEST;
  pFbi->iLayerNum             = 1;
  pFbi->eFrameType            = videoFrameTypeInvalid;

  WelsEmms();

  return ENC_RETURN_SUCCESS;
}

int32_t GetSubSequenceId (sWelsEncCtx* pCtx, EVideoFrameType eFrameType) {
  int32_t iSubSeqId = 0;
  if (eFrameType == videoFrameTypeIDR)
    iSubSeqId = 0;
  else if (eFrameType == videoFrameTypeI)
    iSubSeqId = 1;
  else if (eFrameType == videoFrameTypeP) {
    if (pCtx->bCurFrameMarkedAsSceneLtr)
      iSubSeqId = 2;
    else
      iSubSeqId = 3 + pCtx->uiTemporalId; //T0:3 T1:4 T2:5 T3:6
  } else
    iSubSeqId = 3 + MAX_TEMPORAL_LAYER_NUM;
  return iSubSeqId;
}

// writing parasets for (simulcast) svc
int32_t WriteSsvcParaset (sWelsEncCtx* pCtx, const int32_t kiSpatialNum,
                          SLayerBSInfo*& pLayerBsInfo, int32_t& iLayerNum, int32_t& iFrameSize) {
  int32_t iNonVclSize = 0, iCountNal = 0, iReturn;
  iReturn = WelsWriteParameterSets (pCtx, &pLayerBsInfo->pNalLengthInByte[0], &iCountNal, &iNonVclSize);
  WELS_VERIFY_RETURN_IFNEQ (iReturn, ENC_RETURN_SUCCESS)

  pLayerBsInfo->uiSpatialId     = 0;
  pLayerBsInfo->uiTemporalId    = 0;
  pLayerBsInfo->uiQualityId     = 0;
  pLayerBsInfo->uiLayerType     = NON_VIDEO_CODING_LAYER;
  pLayerBsInfo->iNalCount       = iCountNal;

  //point to next pLayerBsInfo
  ++ pLayerBsInfo;
  ++ pCtx->pOut->iLayerBsIndex;
  pLayerBsInfo->pBsBuf           = pCtx->pFrameBs + pCtx->iPosBsBuffer;
  pLayerBsInfo->pNalLengthInByte = (pLayerBsInfo - 1)->pNalLengthInByte + iCountNal;

  //update for external countings
  ++ iLayerNum;
  iFrameSize += iNonVclSize;
  return iReturn;
}

// writing parasets for simulcast avc
int32_t WriteSavcParaset (sWelsEncCtx* pCtx, const int32_t kiSpatialNum,
                          SLayerBSInfo*& pLayerBsInfo, int32_t& iLayerNum, int32_t& iFrameSize) {
  int32_t iNonVclSize = 0, iCountNal = 0, iReturn;

  // write SPS
  iNonVclSize = 0;

  assert ((kiSpatialNum == pCtx->iSpsNum) || (SPS_LISTING & pCtx->pSvcParam->eSpsPpsIdStrategy));

  for (int32_t iIdx = 0; iIdx < pCtx->iSpsNum; iIdx++) {
    //writing one NAL
    int32_t iNalSize = 0;
    iReturn = WelsWriteOneSPS (pCtx, iIdx, iNalSize);
    WELS_VERIFY_RETURN_IFNEQ (iReturn, ENC_RETURN_SUCCESS)

    pLayerBsInfo->pNalLengthInByte[iCountNal] = iNalSize;
    iNonVclSize += iNalSize;
    iCountNal = 1;
    //finish writing one NAL

    pLayerBsInfo->uiSpatialId   = iIdx;
    pLayerBsInfo->uiTemporalId  = 0;
    pLayerBsInfo->uiQualityId   = 0;
    pLayerBsInfo->uiLayerType   = NON_VIDEO_CODING_LAYER;
    pLayerBsInfo->iNalCount     = iCountNal;

    //point to next pLayerBsInfo
    ++ pLayerBsInfo;
    ++ pCtx->pOut->iLayerBsIndex;
    pLayerBsInfo->pBsBuf           = pCtx->pFrameBs + pCtx->iPosBsBuffer;
    pLayerBsInfo->pNalLengthInByte = (pLayerBsInfo - 1)->pNalLengthInByte + iCountNal;
    //update for external countings
    iCountNal = 0;
    ++ iLayerNum;

  }

  // write PPS

  //TODO: under new strategy, will PPS be correctly updated?

  for (int32_t iIdx = 0; iIdx < pCtx->iPpsNum; iIdx++) {
    //writing one NAL
    int32_t iNalSize = 0;
    iReturn = WelsWriteOnePPS (pCtx, iIdx, iNalSize);
    WELS_VERIFY_RETURN_IFNEQ (iReturn, ENC_RETURN_SUCCESS)

    pLayerBsInfo->pNalLengthInByte[iCountNal] = iNalSize;
    iNonVclSize += iNalSize;
    iCountNal = 1;
    //finish writing one NAL

    pLayerBsInfo->uiSpatialId   = iIdx;
    pLayerBsInfo->uiTemporalId  = 0;
    pLayerBsInfo->uiQualityId   = 0;
    pLayerBsInfo->uiLayerType   = NON_VIDEO_CODING_LAYER;
    pLayerBsInfo->iNalCount     = iCountNal;

    //point to next pLayerBsInfo
    ++ pLayerBsInfo;
    ++ pCtx->pOut->iLayerBsIndex;
    pLayerBsInfo->pBsBuf           = pCtx->pFrameBs + pCtx->iPosBsBuffer;
    pLayerBsInfo->pNalLengthInByte = (pLayerBsInfo - 1)->pNalLengthInByte + iCountNal;
    //update for external countings
    iCountNal = 0;
    ++ iLayerNum;
  }

  // to check number of layers / nals / slices dependencies
  if (iLayerNum > MAX_LAYER_NUM_OF_FRAME) {
    WelsLog (& pCtx->sLogCtx, WELS_LOG_ERROR, "WriteSavcParaset(), iLayerNum(%d) > MAX_LAYER_NUM_OF_FRAME(%d)!",
             iLayerNum, MAX_LAYER_NUM_OF_FRAME);
    return 1;
  }

  iFrameSize += iNonVclSize;
  return iReturn;
}

//cover the logic of simulcast avc + sps_pps_listing
int32_t WriteSavcParaset_Listing (sWelsEncCtx* pCtx, const int32_t kiSpatialNum,
                                  SLayerBSInfo*& pLayerBsInfo, int32_t& iLayerNum, int32_t& iFrameSize) {
  int32_t iNonVclSize = 0, iCountNal = 0, iReturn;

  // write SPS
  iNonVclSize = 0;

  for (int32_t iSpatialId = 0; iSpatialId < kiSpatialNum; iSpatialId++) {
    iCountNal = 0;
    for (int32_t iIdx = 0; iIdx < pCtx->iSpsNum; iIdx++) {
      //writing one NAL
      int32_t iNalSize = 0;
      iReturn = WelsWriteOneSPS (pCtx, iIdx, iNalSize);
      WELS_VERIFY_RETURN_IFNEQ (iReturn, ENC_RETURN_SUCCESS)

      pLayerBsInfo->pNalLengthInByte[iCountNal] = iNalSize;
      iNonVclSize += iNalSize;
      iCountNal ++;
      //finish writing one NAL
    }

    pLayerBsInfo->uiSpatialId   = iSpatialId;
    pLayerBsInfo->uiTemporalId  = 0;
    pLayerBsInfo->uiQualityId   = 0;
    pLayerBsInfo->uiLayerType   = NON_VIDEO_CODING_LAYER;
    pLayerBsInfo->iNalCount     = iCountNal;

    //point to next pLayerBsInfo
    ++ pLayerBsInfo;
    ++ pCtx->pOut->iLayerBsIndex;
    pLayerBsInfo->pBsBuf           = pCtx->pFrameBs + pCtx->iPosBsBuffer;
    pLayerBsInfo->pNalLengthInByte = (pLayerBsInfo - 1)->pNalLengthInByte + iCountNal;
    //update for external countings
    iCountNal = 0;
    ++ iLayerNum;
  }

  // write PPS
  if ((SPS_PPS_LISTING == pCtx->pSvcParam->eSpsPpsIdStrategy) && (pCtx->iPpsNum < MAX_PPS_COUNT)) {
    UpdatePpsList (pCtx);
  }

  //TODO: under new strategy, will PPS be correctly updated?
  for (int32_t iSpatialId = 0; iSpatialId < kiSpatialNum; iSpatialId++) {
    iCountNal = 0;
    for (int32_t iIdx = 0; iIdx < pCtx->iPpsNum; iIdx++) {
      //writing one NAL
      int32_t iNalSize = 0;
      iReturn = WelsWriteOnePPS (pCtx, iIdx, iNalSize);
      WELS_VERIFY_RETURN_IFNEQ (iReturn, ENC_RETURN_SUCCESS)

      pLayerBsInfo->pNalLengthInByte[iCountNal] = iNalSize;
      iNonVclSize += iNalSize;
      iCountNal ++;
      //finish writing one NAL
    }

    pLayerBsInfo->uiSpatialId   = iSpatialId;
    pLayerBsInfo->uiTemporalId  = 0;
    pLayerBsInfo->uiQualityId   = 0;
    pLayerBsInfo->uiLayerType   = NON_VIDEO_CODING_LAYER;
    pLayerBsInfo->iNalCount     = iCountNal;

    //point to next pLayerBsInfo
    ++ pLayerBsInfo;
    ++ pCtx->pOut->iLayerBsIndex;
    pLayerBsInfo->pBsBuf           = pCtx->pFrameBs + pCtx->iPosBsBuffer;
    pLayerBsInfo->pNalLengthInByte = (pLayerBsInfo - 1)->pNalLengthInByte + iCountNal;
    //update for external countings
    iCountNal = 0;
    ++ iLayerNum;
  }

  // to check number of layers / nals / slices dependencies
  if (iLayerNum > MAX_LAYER_NUM_OF_FRAME) {
    WelsLog (& pCtx->sLogCtx, WELS_LOG_ERROR, "WriteSavcParaset(), iLayerNum(%d) > MAX_LAYER_NUM_OF_FRAME(%d)!",
             iLayerNum, MAX_LAYER_NUM_OF_FRAME);
    return ENC_RETURN_UNEXPECTED;
  }

  iFrameSize += iNonVclSize;
  return iReturn;
}

void StackBackEncoderStatus (sWelsEncCtx* pEncCtx,
                             EVideoFrameType keFrameType) {
  // for bitstream writing
  pEncCtx->iPosBsBuffer        = 0;   // reset bs pBuffer position
  pEncCtx->pOut->iNalIndex     = 0;   // reset NAL index
  pEncCtx->pOut->iLayerBsIndex = 0;   // reset index of Layer Bs

  InitBits (&pEncCtx->pOut->sBsWrite, pEncCtx->pOut->pBsBuffer, pEncCtx->pOut->uiSize);
  if ((keFrameType == videoFrameTypeP) || (keFrameType == videoFrameTypeI)) {
    pEncCtx->iFrameIndex --;
    if (pEncCtx->iPOC != 0) {
      pEncCtx->iPOC -= 2;
    } else {
      pEncCtx->iPOC = (1 << pEncCtx->pSps->iLog2MaxPocLsb) - 2;
    }

    LoadBackFrameNum(pEncCtx);
    pEncCtx->eNalType     = NAL_UNIT_CODED_SLICE;
    pEncCtx->eSliceType   = P_SLICE;
    //pEncCtx->eNalPriority = pEncCtx->eLastNalPriority; //not need this since eNalPriority will be updated at the beginning of coding a frame
  } else if (keFrameType == videoFrameTypeIDR) {
    pEncCtx->uiIdrPicId --;

    //set the next frame to be IDR
    ForceCodingIDR (pEncCtx);
  } else { // B pictures are not supported now, any else?
    assert (0);
  }

  // no need to stack back RC info since the info is still useful for later RQ model calculation
  // no need to stack back MB slicing info for dynamic balancing, since the info is still refer-able
}

void ClearFrameBsInfo (sWelsEncCtx* pCtx, SFrameBSInfo* pFbi) {
  pFbi->sLayerInfo[0].pBsBuf           = pCtx->pFrameBs;
  pFbi->sLayerInfo[0].pNalLengthInByte = pCtx->pOut->pNalLen;

  for (int i = 0; i < pFbi->iLayerNum; i++) {
    pFbi->sLayerInfo[i].iNalCount = 0;
  }
  pFbi->iLayerNum = 0;
  pFbi->iFrameSizeInBytes = 0;
  pFbi->eFrameType = videoFrameTypeSkip;
}

/*!
 * \brief   core svc encoding process
 *
 * \pParam  pCtx            sWelsEncCtx*, encoder context
 * \pParam  pFbi            FrameBSInfo*
 * \pParam  pSrcPic         Source Picture
 * \return  EFrameType (videoFrameTypeIDR/videoFrameTypeI/videoFrameTypeP)
 */
int32_t WelsEncoderEncodeExt (sWelsEncCtx* pCtx, SFrameBSInfo* pFbi, const SSourcePicture* pSrcPic) {
  if (pCtx == NULL) {
    return ENC_RETURN_MEMALLOCERR;
  }
  SLayerBSInfo* pLayerBsInfo            = &pFbi->sLayerInfo[0];
  SWelsSvcCodingParam* pSvcParam        = pCtx->pSvcParam;
  SSpatialPicIndex* pSpatialIndexMap = &pCtx->sSpatialIndexMap[0];
#if defined(ENABLE_FRAME_DUMP) || defined(ENABLE_PSNR_CALC)
  SPicture* fsnr                = NULL;
#endif//ENABLE_FRAME_DUMP || ENABLE_PSNR_CALC
  SPicture* pEncPic             = NULL; // to be decided later
  int32_t iDidList[MAX_DEPENDENCY_LAYER] = {0};
  int32_t iLayerNum             = 0;
  int32_t iLayerSize            = 0;
  int32_t iSpatialNum           = 0; // available count number of spatial layers due to frame size changed in this given frame
  int32_t iSpatialIdx           = 0; // iIndex of spatial layers due to frame size changed in this given frame
  int32_t iFrameSize            = 0;
  int32_t iNalIdxInLayer        = 0;
  int32_t iCountNal             = 0;
  EVideoFrameType eFrameType    = videoFrameTypeInvalid;
  int32_t iCurWidth             = 0;
  int32_t iCurHeight            = 0;
  EWelsNalUnitType eNalType     = NAL_UNIT_UNSPEC_0;
  EWelsNalRefIdc eNalRefIdc     = NRI_PRI_LOWEST;
  int8_t iCurDid                = 0;
  int8_t iCurTid                = 0;
  bool bAvcBased                = false;
  SLogContext* pLogCtx = & (pCtx->sLogCtx);
#if defined(ENABLE_PSNR_CALC)
  float fSnrY = .0f, fSnrU = .0f, fSnrV = .0f;
#endif//ENABLE_PSNR_CALC

#if defined(_DEBUG)
  int32_t i = 0, j = 0, k = 0;
#endif//_DEBUG

  pCtx->iEncoderError = ENC_RETURN_SUCCESS;
  pCtx->bCurFrameMarkedAsSceneLtr = false;
  pFbi->iLayerNum = 0; // for initialization
  pFbi->uiTimeStamp = pSrcPic->uiTimeStamp;
  // perform csc/denoise/downsample/padding, generate spatial layers
  iSpatialNum = pCtx->pVpp->BuildSpatialPicList (pCtx, pSrcPic);
  if (pCtx->pFuncList->pfRc.pfWelsUpdateMaxBrWindowStatus) {
    pCtx->pFuncList->pfRc.pfWelsUpdateMaxBrWindowStatus (pCtx, iSpatialNum, pSrcPic->uiTimeStamp);
  }

  if (iSpatialNum < 1) { // skip due to temporal layer settings (different frame rate)
    ++ pCtx->iCodingIndex;
    pFbi->eFrameType = videoFrameTypeSkip;
    WelsLog (& (pCtx->sLogCtx), WELS_LOG_DEBUG,
             "[Rc] Frame timestamp = %lld, skip one frame due to preprocessing return (temporal layer settings or else), continual skipped %d frames",
             pSrcPic->uiTimeStamp, pCtx->iContinualSkipFrames);
    return ENC_RETURN_SUCCESS;
  }

  eFrameType = DecideFrameType (pCtx, iSpatialNum);
  if (eFrameType == videoFrameTypeSkip) {
    if (pCtx->pFuncList->pfRc.pfWelsUpdateBufferWhenSkip)
      pCtx->pFuncList->pfRc.pfWelsUpdateBufferWhenSkip (pCtx, iSpatialNum);
    pFbi->eFrameType = eFrameType;
    WelsLog (& (pCtx->sLogCtx), WELS_LOG_DEBUG,
             "[Rc] Frame timestamp = %lld, skip one frame due to target_br, continual skipped %d frames",
             pSrcPic->uiTimeStamp, pCtx->iContinualSkipFrames);
    return ENC_RETURN_SUCCESS;
  }

  //loop each layer to check if have skip frame when RC and frame skip enable
  if (pCtx->pFuncList->pfRc.pfWelsCheckSkipBasedMaxbr) {
    bool bSkip = pCtx->pFuncList->pfRc.pfWelsCheckSkipBasedMaxbr (pCtx, iSpatialNum, eFrameType,
                 (uint32_t)pSrcPic->uiTimeStamp);
    if (bSkip) {
      pFbi->eFrameType = videoFrameTypeSkip;
      WelsLog (& (pCtx->sLogCtx), WELS_LOG_DEBUG,
               "[Rc] Frame timestamp = %lld, skip one frame due to max_br, continual skipped %d frames",
               pSrcPic->uiTimeStamp, pCtx->iContinualSkipFrames);
      return ENC_RETURN_SUCCESS;
    }
  }

  pCtx->iContinualSkipFrames = 0;
  InitFrameCoding (pCtx, eFrameType);

  iCurTid = GetTemporalLevel (&pSvcParam->sDependencyLayers[pSpatialIndexMap->iDid], pCtx->iCodingIndex,
                              pSvcParam->uiGopSize);
  pCtx->uiTemporalId = iCurTid;

  pLayerBsInfo->pBsBuf = pCtx->pFrameBs ;
  pLayerBsInfo->pNalLengthInByte = pCtx->pOut->pNalLen;

  if (eFrameType == videoFrameTypeIDR) {
    ++ pCtx->uiIdrPicId;
    // write parameter sets bitstream or SEI/SSEI (if any) here
    // TODO: use function pointer instead
    if (! (SPS_LISTING & pCtx->pSvcParam->eSpsPpsIdStrategy)) {
      pCtx->iEncoderError = ((!pSvcParam->bSimulcastAVC)
                             ? (WriteSsvcParaset (pCtx, iSpatialNum, pLayerBsInfo, iLayerNum, iFrameSize))
                             : (WriteSavcParaset (pCtx, iSpatialNum, pLayerBsInfo, iLayerNum, iFrameSize)));
    } else {
      pCtx->iEncoderError = WriteSavcParaset_Listing (pCtx, iSpatialNum, pLayerBsInfo, iLayerNum, iFrameSize);
    }
    WELS_VERIFY_RETURN_IFNEQ (pCtx->iEncoderError, ENC_RETURN_SUCCESS)
  }

  pCtx->pCurDqLayer             = pCtx->ppDqLayerList[pSpatialIndexMap->iDid];
  pCtx->pCurDqLayer->pRefLayer  = NULL;

  while (iSpatialIdx < iSpatialNum) {
    const int32_t iDidIdx       = (pSpatialIndexMap + iSpatialIdx)->iDid;       // get iDid
    SSpatialLayerConfig* pParam = &pSvcParam->sSpatialLayers[iDidIdx];
    int32_t  iDecompositionStages = pSvcParam->sDependencyLayers[iDidIdx].iDecompositionStages;
    pCtx->uiDependencyId        = iCurDid = (int8_t)iDidIdx;
    pCtx->pVpp->AnalyzeSpatialPic (pCtx, iDidIdx);

    pCtx->pEncPic               = pEncPic = (pSpatialIndexMap + iSpatialIdx)->pSrc;
    pCtx->pEncPic->iPictureType = pCtx->eSliceType;
    pCtx->pEncPic->iFramePoc    = pCtx->iPOC;

    iCurWidth   = pParam->iVideoWidth;
    iCurHeight  = pParam->iVideoHeight;

    iDidList[iSpatialIdx]       = iCurDid;

    // Encoding this picture might mulitiple sQualityStat layers potentially be encoded as followed
    switch (pParam->sSliceArgument.uiSliceMode) {
    case SM_FIXEDSLCNUM_SLICE: {
      if ((iCurDid > 0) && (pSvcParam->iMultipleThreadIdc > 1) &&
          (pSvcParam->sSpatialLayers[iCurDid].sSliceArgument.uiSliceMode == SM_FIXEDSLCNUM_SLICE
           && pSvcParam->bUseLoadBalancing
           && pSvcParam->iMultipleThreadIdc >= pSvcParam->sSpatialLayers[iCurDid].sSliceArgument.uiSliceNum)
         )
        AdjustEnhanceLayer (pCtx, iCurDid);
      break;
    }
    case SM_SIZELIMITED_SLICE: {
      int32_t iPicIPartitionNum = PicPartitionNumDecision (pCtx);
      // MT compatibility
      pCtx->iActiveThreadsNum =
        iPicIPartitionNum; // we try to active number of threads, equal to number of picture partitions
      WelsInitCurrentDlayerMltslc (pCtx, iPicIPartitionNum);
      break;
    }
    default: {
      break;
    }
    }

    /* coding each spatial layer, only one sQualityStat layer within spatial support */
    int32_t iSliceCount = 1;
    if (iLayerNum >= MAX_LAYER_NUM_OF_FRAME) { // check available layer_bs_info writing as follows
      WelsLog (pLogCtx, WELS_LOG_ERROR, "WelsEncoderEncodeExt(), iLayerNum(%d) overflow(max:%d)!", iLayerNum,
               MAX_LAYER_NUM_OF_FRAME);
      return ENC_RETURN_UNSUPPORTED_PARA;
    }

    iNalIdxInLayer  = 0;
    bAvcBased       = ((pSvcParam->bSimulcastAVC) || (iCurDid == BASE_DEPENDENCY_ID));
    pCtx->bNeedPrefixNalFlag    = ((!pSvcParam->bSimulcastAVC) && (bAvcBased &&
                                 (pSvcParam->bPrefixNalAddingCtrl ||
                                  (pSvcParam->iSpatialLayerNum > 1))));

    if (eFrameType == videoFrameTypeP) {
      eNalType = bAvcBased ? NAL_UNIT_CODED_SLICE : NAL_UNIT_CODED_SLICE_EXT;
    } else if (eFrameType == videoFrameTypeIDR) {
      eNalType = bAvcBased ? NAL_UNIT_CODED_SLICE_IDR : NAL_UNIT_CODED_SLICE_EXT;
    }
    if (iCurTid == 0 || pCtx->eSliceType == I_SLICE)
      eNalRefIdc = NRI_PRI_HIGHEST;
    else if (iCurTid == iDecompositionStages)
      eNalRefIdc = NRI_PRI_LOWEST;
    else if (1 + iCurTid == iDecompositionStages)
      eNalRefIdc = NRI_PRI_LOW;
    else // more details for other temporal layers?
      eNalRefIdc = NRI_PRI_HIGHEST;
    pCtx->eNalType = eNalType;
    pCtx->eNalPriority = eNalRefIdc;

    pCtx->pDecPic               = pCtx->ppRefPicListExt[iCurDid]->pNextBuffer;
#if defined(ENABLE_FRAME_DUMP) || defined(ENABLE_PSNR_CALC)
    fsnr                        = pCtx->pDecPic;
#endif//#if defined(ENABLE_FRAME_DUMP) || defined(ENABLE_PSNR_CALC)
    pCtx->pDecPic->iPictureType = pCtx->eSliceType;
    pCtx->pDecPic->iFramePoc    = pCtx->iPOC;

    WelsInitCurrentLayer (pCtx, iCurWidth, iCurHeight);

    pCtx->pFuncList->pMarkPic (pCtx);
    if (!pCtx->pFuncList->pBuildRefList (pCtx, pCtx->iPOC, 0)) {
      WelsLog (pLogCtx, WELS_LOG_WARNING,
               "WelsEncoderEncodeExt(), WelsBuildRefList failed for P frames, pCtx->iNumRef0= %d. ForceCodingIDR!",
               pCtx->iNumRef0);
      eFrameType = videoFrameTypeIDR;
      pCtx->iEncoderError = ENC_RETURN_CORRECTED;
      break;
    }
    if (pCtx->eSliceType != I_SLICE) {
      pCtx->pFuncList->pAfterBuildRefList (pCtx);
    }
#ifdef LONG_TERM_REF_DUMP
    DumpRef (pCtx);
#endif

    if (pSvcParam->iRCMode != RC_OFF_MODE)
      pCtx->pVpp->AnalyzePictureComplexity (pCtx, pCtx->pEncPic, ((pCtx->eSliceType == P_SLICE)
                                            && (pCtx->iNumRef0 > 0)) ? pCtx->pRefList0[0] : NULL,
                                            iCurDid, (pCtx->eSliceType == P_SLICE) && pSvcParam->bEnableBackgroundDetection);
    WelsUpdateRefSyntax (pCtx,  pCtx->iPOC,
                         eFrameType); //get reordering syntax used for writing slice header and transmit to encoder.
    PrefetchReferencePicture (pCtx, eFrameType); // update reference picture for current pDq layer

    pCtx->pFuncList->pfRc.pfWelsRcPictureInit (pCtx, pSrcPic->uiTimeStamp);
    PreprocessSliceCoding (pCtx); // MUST be called after pfWelsRcPictureInit() and WelsInitCurrentLayer()

    //TODO Complexity Calculation here for screen content
    iLayerSize = 0;

    if (SM_SINGLE_SLICE == pParam->sSliceArgument.uiSliceMode) { // only one slice within a sQualityStat layer
      int32_t iSliceSize = 0;
      int32_t iPayloadSize = 0;

      if (pCtx->bNeedPrefixNalFlag) {
        pCtx->iEncoderError = AddPrefixNal (pCtx, pLayerBsInfo, &pLayerBsInfo->pNalLengthInByte[0], &iNalIdxInLayer, eNalType,
                                            eNalRefIdc,
                                            iPayloadSize);
        WELS_VERIFY_RETURN_IFNEQ (pCtx->iEncoderError, ENC_RETURN_SUCCESS)
        iLayerSize += iPayloadSize;
      }

      WelsLoadNal (pCtx->pOut, eNalType, eNalRefIdc);

      //the following line is to fix a problem with a specific setting as in test DiffSlicingInDlayerMixed:
      //      (multi-th on with SM_SINGLE_SLICE in one of the D layers)
      //TODO: this may not be needed any more after the slice buffer refactoring
      pCtx->pCurDqLayer->sLayerInfo.pSliceInLayer[0].pSliceBsa = &(pCtx->pOut->sBsWrite);

      pCtx->iEncoderError = WelsCodeOneSlice (pCtx, 0, eNalType);
      WELS_VERIFY_RETURN_IFNEQ (pCtx->iEncoderError, ENC_RETURN_SUCCESS)

      WelsUnloadNal (pCtx->pOut);

      pCtx->iEncoderError = WelsEncodeNal (&pCtx->pOut->sNalList[pCtx->pOut->iNalIndex - 1],
                                           &pCtx->pCurDqLayer->sLayerInfo.sNalHeaderExt,
                                           pCtx->iFrameBsSize - pCtx->iPosBsBuffer,
                                           pCtx->pFrameBs + pCtx->iPosBsBuffer,
                                           &pLayerBsInfo->pNalLengthInByte[iNalIdxInLayer]);
      WELS_VERIFY_RETURN_IFNEQ (pCtx->iEncoderError, ENC_RETURN_SUCCESS)
      iSliceSize = pLayerBsInfo->pNalLengthInByte[iNalIdxInLayer];

      iLayerSize += iSliceSize;
      pCtx->iPosBsBuffer               += iSliceSize;
      pLayerBsInfo->uiLayerType         = VIDEO_CODING_LAYER;
      pLayerBsInfo->uiSpatialId         = iCurDid;
      pLayerBsInfo->uiTemporalId        = iCurTid;
      pLayerBsInfo->uiQualityId         = 0;
      pLayerBsInfo->iNalCount           = ++ iNalIdxInLayer;
    }
    // for dynamic slicing single threading..
    else if ((SM_SIZELIMITED_SLICE == pParam->sSliceArgument.uiSliceMode) && (pSvcParam->iMultipleThreadIdc <= 1)) {
      const int32_t kiLastMbInFrame = pCtx->pCurDqLayer->pSliceEncCtx->iMbNumInFrame;
      pCtx->iEncoderError = WelsCodeOnePicPartition (pCtx, pFbi, pLayerBsInfo, &iNalIdxInLayer, &iLayerSize, 0,
                            kiLastMbInFrame, 0);
      WELS_VERIFY_RETURN_IFNEQ (pCtx->iEncoderError, ENC_RETURN_SUCCESS)
    } else {
      //other multi-slice uiSliceMode
      int32_t iRet = 0;
      // THREAD_FULLY_FIRE_MODE/THREAD_PICK_UP_MODE for any mode of non-SM_SIZELIMITED_SLICE
      if ((SM_SIZELIMITED_SLICE != pParam->sSliceArgument.uiSliceMode) && (pSvcParam->iMultipleThreadIdc > 1)) {
        iSliceCount = GetCurrentSliceNum (pCtx->pCurDqLayer->pSliceEncCtx);
        if (iLayerNum + 1 >= MAX_LAYER_NUM_OF_FRAME) { // check available layer_bs_info for further writing as followed
          WelsLog (pLogCtx, WELS_LOG_ERROR,
                   "WelsEncoderEncodeExt(), iLayerNum(%d) overflow(max:%d) at iDid= %d uiSliceMode= %d, iSliceCount= %d!",
                   iLayerNum, MAX_LAYER_NUM_OF_FRAME, iCurDid, pParam->sSliceArgument.uiSliceMode, iSliceCount);
          return ENC_RETURN_UNSUPPORTED_PARA;
        }
        if (iSliceCount <= 1) {
          WelsLog (pLogCtx, WELS_LOG_ERROR,
                   "WelsEncoderEncodeExt(), iSliceCount(%d) from GetCurrentSliceNum() is untrusted due stack/heap crupted!",
                   iSliceCount);
          return ENC_RETURN_UNEXPECTED;
        }
        //note: the old codes are removed at commit: 3e0ee69
        pLayerBsInfo->pBsBuf = pCtx->pFrameBs + pCtx->iPosBsBuffer;
        pLayerBsInfo->uiLayerType   = VIDEO_CODING_LAYER;
        pLayerBsInfo->uiSpatialId   = pCtx->uiDependencyId;
        pLayerBsInfo->uiTemporalId  = pCtx->uiTemporalId;
        pLayerBsInfo->uiQualityId   = 0;
        pLayerBsInfo->iNalCount     = 0;
        pCtx->pSliceBs[0].pBs = pLayerBsInfo->pBsBuf;

        pCtx->pTaskManage->ExecuteTasks();
        iLayerSize = AppendSliceToFrameBs (pCtx, pLayerBsInfo, iSliceCount);
      }
      // THREAD_FULLY_FIRE_MODE && SM_SIZELIMITED_SLICE
      else if ((SM_SIZELIMITED_SLICE == pParam->sSliceArgument.uiSliceMode) && (pSvcParam->iMultipleThreadIdc > 1)) {
        const int32_t kiPartitionCnt = pCtx->iActiveThreadsNum; //pSvcParam->iCountThreadsNum;

        // to fire slice coding threads
        iRet = FiredSliceThreads (pCtx, &pCtx->pSliceThreading->pThreadPEncCtx[0],
                                  &pCtx->pSliceThreading->pReadySliceCodingEvent[0],
                                  &pCtx->pSliceThreading->pThreadMasterEvent[0],
                                  pFbi, kiPartitionCnt, pCtx->pCurDqLayer->pSliceEncCtx, true);
        if (iRet) {
          WelsLog (pLogCtx, WELS_LOG_ERROR,
                   "[MT] WelsEncoderEncodeExt(), FiredSliceThreads return(%d) failed and exit encoding frame, iCountThreadsNum= %d, iSliceCount= %d, uiSliceMode= %d, iMultipleThreadIdc= %d!!",
                   iRet, pSvcParam->iCountThreadsNum, iSliceCount, pParam->sSliceArgument.uiSliceMode, pSvcParam->iMultipleThreadIdc);
          return ENC_RETURN_UNEXPECTED;
        }

        WelsMultipleEventsWaitAllBlocking (kiPartitionCnt, &pCtx->pSliceThreading->pSliceCodedEvent[0],
                                           &pCtx->pSliceThreading->pSliceCodedMasterEvent);
        WELS_VERIFY_RETURN_IFNEQ (pCtx->iEncoderError, ENC_RETURN_SUCCESS)

        iLayerSize = AppendSliceToFrameBs (pCtx, pLayerBsInfo, kiPartitionCnt);
      } else { // for non-dynamic-slicing mode single threading branch..
        const bool bNeedPrefix = pCtx->bNeedPrefixNalFlag;
        int32_t iSliceIdx = 0;

        iSliceCount = GetCurrentSliceNum (pCtx->pCurDqLayer->pSliceEncCtx);
        while (iSliceIdx < iSliceCount) {
          int32_t iSliceSize    = 0;
          int32_t iPayloadSize  = 0;
          if (bNeedPrefix) {
            pCtx->iEncoderError = AddPrefixNal (pCtx, pLayerBsInfo, &pLayerBsInfo->pNalLengthInByte[0], &iNalIdxInLayer, eNalType,
                                                eNalRefIdc,
                                                iPayloadSize);
            WELS_VERIFY_RETURN_IFNEQ (pCtx->iEncoderError, ENC_RETURN_SUCCESS)
            iLayerSize += iPayloadSize;
          }

          WelsLoadNal (pCtx->pOut, eNalType, eNalRefIdc);
          pCtx->iEncoderError = WelsCodeOneSlice (pCtx, iSliceIdx, eNalType);
          WELS_VERIFY_RETURN_IFNEQ (pCtx->iEncoderError, ENC_RETURN_SUCCESS)

          WelsUnloadNal (pCtx->pOut);

          pCtx->iEncoderError = WelsEncodeNal (&pCtx->pOut->sNalList[pCtx->pOut->iNalIndex - 1],
                                               &pCtx->pCurDqLayer->sLayerInfo.sNalHeaderExt,
                                               pCtx->iFrameBsSize - pCtx->iPosBsBuffer,
                                               pCtx->pFrameBs + pCtx->iPosBsBuffer, &pLayerBsInfo->pNalLengthInByte[iNalIdxInLayer]);
          WELS_VERIFY_RETURN_IFNEQ (pCtx->iEncoderError, ENC_RETURN_SUCCESS)
          iSliceSize = pLayerBsInfo->pNalLengthInByte[iNalIdxInLayer];

          pCtx->iPosBsBuffer += iSliceSize;
          iLayerSize         += iSliceSize;

#if defined(SLICE_INFO_OUTPUT)
          fprintf (stderr,
                   "@slice=%-6d sliceType:%c idc:%d size:%-6d\n",
                   iSliceIdx,
                   (pCtx->eSliceType == P_SLICE ? 'P' : 'I'),
                   eNalRefIdc,
                   iSliceSize);
#endif//SLICE_INFO_OUTPUT
          ++ iNalIdxInLayer;
          ++ iSliceIdx;
        }

        pLayerBsInfo->uiLayerType       = VIDEO_CODING_LAYER;
        pLayerBsInfo->uiSpatialId       = iCurDid;
        pLayerBsInfo->uiTemporalId      = iCurTid;
        pLayerBsInfo->uiQualityId       = 0;
        pLayerBsInfo->iNalCount         = iNalIdxInLayer;
      }
    }

    if (NULL != pCtx->pFuncList->pfRc.pfWelsRcPostFrameSkipping
        && pCtx->pFuncList->pfRc.pfWelsRcPostFrameSkipping (pCtx, iCurDid, pSrcPic->uiTimeStamp)) {

      StackBackEncoderStatus (pCtx, eFrameType);
      ClearFrameBsInfo (pCtx, pFbi);

      iFrameSize = 0;
      iLayerSize = 0;
      iLayerNum = 0;

      if (pCtx->pFuncList->pfRc.pfWelsUpdateBufferWhenSkip) {
        pCtx->pFuncList->pfRc.pfWelsUpdateBufferWhenSkip (pCtx, iSpatialNum);
      }

      WelsRcPostFrameSkippedUpdate(pCtx, iCurDid);
      WelsLog (& (pCtx->sLogCtx), WELS_LOG_INFO,
               "[Rc] Frame timestamp = %lld, skip one frame due to post skip, continual skipped %d frames",
               pSrcPic->uiTimeStamp, pCtx->iContinualSkipFrames);
      pCtx->iEncoderError = ENC_RETURN_SUCCESS;
      return ENC_RETURN_SUCCESS;
    }

    // deblocking filter
    if (
      (!pCtx->pCurDqLayer->bDeblockingParallelFlag) &&
#if !defined(ENABLE_FRAME_DUMP)
      ((eNalRefIdc != NRI_PRI_LOWEST) && (pSvcParam->sDependencyLayers[iDidIdx].iHighestTemporalId == 0
                                          || iCurTid < pSvcParam->sDependencyLayers[iDidIdx].iHighestTemporalId)) &&
#endif//!ENABLE_FRAME_DUMP
      true
    ) {
      PerformDeblockingFilter (pCtx);
    }

    pCtx->pFuncList->pfRc.pfWelsRcPictureInfoUpdate (pCtx, iLayerSize);
    RcTraceFrameBits (pCtx, pSrcPic->uiTimeStamp);
    pCtx->pDecPic->iFrameAverageQp = pCtx->pWelsSvcRc[iDidIdx].iAverageFrameQp;

    //update scc related
    pCtx->pFuncList->pfUpdateFMESwitch (pCtx->pCurDqLayer);

    // reference picture list update
    if (eNalRefIdc != NRI_PRI_LOWEST) {
      if (!pCtx->pFuncList->pUpdateRefList (pCtx)) {
        WelsLog (pLogCtx, WELS_LOG_WARNING, "WelsEncoderEncodeExt(), WelsUpdateRefList failed. ForceCodingIDR!");
        //the above is to set the next frame to be IDR
        pCtx->iEncoderError = ENC_RETURN_CORRECTED;
        break;
      }
    }

    iFrameSize += iLayerSize;
    //check MinCr
    {
      int32_t iImageSize = (pParam->iVideoWidth * pParam->iVideoHeight * 3) >> 1;
      int32_t iMinCr = g_ksLevelLimits[pParam->uiLevelIdc - 1].uiMinCR;
      if (iFrameSize > (iImageSize / iMinCr))
        WelsLog (pLogCtx, WELS_LOG_WARNING,
                 "WelsEncoderEncodeExt()MinCr Checking,codec bitstream size is larger than Level limitation");
    }
#ifdef ENABLE_FRAME_DUMP
    if (iCurDid + 1 < pSvcParam->iSpatialLayerNum) {
      DumpDependencyRec (fsnr, &pSvcParam->sDependencyLayers[iCurDid].sRecFileName[0], iCurDid,
                         pCtx->bDependencyRecFlag[iCurDid], pCtx->pCurDqLayer);
      pCtx->bDependencyRecFlag[iCurDid] = true;
    }
#endif//ENABLE_FRAME_DUMP

#if defined(ENABLE_PSNR_CALC)
    fSnrY = WelsCalcPsnr (fsnr->pData[0],
                          fsnr->iLineSize[0],
                          pEncPic->pData[0],
                          pEncPic->iLineSize[0],
                          iCurWidth,
                          iCurHeight);
    fSnrU = WelsCalcPsnr (fsnr->pData[1],
                          fsnr->iLineSize[1],
                          pEncPic->pData[1],
                          pEncPic->iLineSize[1],
                          (iCurWidth >> 1),
                          (iCurHeight >> 1));
    fSnrV = WelsCalcPsnr (fsnr->pData[2],
                          fsnr->iLineSize[2],
                          pEncPic->pData[2],
                          pEncPic->iLineSize[2],
                          (iCurWidth >> 1),
                          (iCurHeight >> 1));
#endif//ENABLE_PSNR_CALC

#if defined(LAYER_INFO_OUTPUT)
    fprintf (stderr, "%2s %5d: %-5d %2s   T%1d D%1d Q%-2d  QP%3d   Y%2.2f  U%2.2f  V%2.2f  %8d bits\n",
             (iSpatialIdx == 0) ? "#AU" : "   ",
             pCtx->iPOC,
             pCtx->iFrameNum,
             (uiFrameType == videoFrameTypeI || uiFrameType == videoFrameTypeIDR) ? "I" : "P",
             iCurTid,
             iCurDid,
             0,
             pCtx->pWelsSvcRc[pCtx->uiDependencyId].iAverageFrameQp,
             fSnrY,
             fSnrU,
             fSnrV,
             (iLayerSize << 3));
#endif//LAYER_INFO_OUTPUT

#if defined(STAT_OUTPUT)

#if defined(ENABLE_PSNR_CALC)
    {
      pCtx->sStatData[iCurDid][0].sQualityStat.rYPsnr[pCtx->eSliceType] += fSnrY;
      pCtx->sStatData[iCurDid][0].sQualityStat.rUPsnr[pCtx->eSliceType] += fSnrU;
      pCtx->sStatData[iCurDid][0].sQualityStat.rVPsnr[pCtx->eSliceType] += fSnrV;
    }
#endif//ENABLE_PSNR_CALC

#if defined(MB_TYPES_CHECK) //091025, frame output
    if (pCtx->eSliceType == P_SLICE) {
      pCtx->sStatData[iCurDid][0].sSliceData.iMbCount[P_SLICE][Intra4x4] += pCtx->sPerInfo.iMbCount[P_SLICE][Intra4x4];
      pCtx->sStatData[iCurDid][0].sSliceData.iMbCount[P_SLICE][Intra16x16] += pCtx->sPerInfo.iMbCount[P_SLICE][Intra16x16];
      pCtx->sStatData[iCurDid][0].sSliceData.iMbCount[P_SLICE][Inter16x16] += pCtx->sPerInfo.iMbCount[P_SLICE][Inter16x16];
      pCtx->sStatData[iCurDid][0].sSliceData.iMbCount[P_SLICE][Inter16x8] += pCtx->sPerInfo.iMbCount[P_SLICE][Inter16x8];
      pCtx->sStatData[iCurDid][0].sSliceData.iMbCount[P_SLICE][Inter8x16] += pCtx->sPerInfo.iMbCount[P_SLICE][Inter8x16];
      pCtx->sStatData[iCurDid][0].sSliceData.iMbCount[P_SLICE][Inter8x8] += pCtx->sPerInfo.iMbCount[P_SLICE][Inter8x8];
      pCtx->sStatData[iCurDid][0].sSliceData.iMbCount[P_SLICE][PSkip] += pCtx->sPerInfo.iMbCount[P_SLICE][PSkip];
      pCtx->sStatData[iCurDid][0].sSliceData.iMbCount[P_SLICE][8] += pCtx->sPerInfo.iMbCount[P_SLICE][8];
      pCtx->sStatData[iCurDid][0].sSliceData.iMbCount[P_SLICE][9] += pCtx->sPerInfo.iMbCount[P_SLICE][9];
      pCtx->sStatData[iCurDid][0].sSliceData.iMbCount[P_SLICE][10] += pCtx->sPerInfo.iMbCount[P_SLICE][10];
      pCtx->sStatData[iCurDid][0].sSliceData.iMbCount[P_SLICE][11] += pCtx->sPerInfo.iMbCount[P_SLICE][11];
    } else if (pCtx->eSliceType == I_SLICE) {
      pCtx->sStatData[iCurDid][0].sSliceData.iMbCount[I_SLICE][Intra4x4] += pCtx->sPerInfo.iMbCount[I_SLICE][Intra4x4];
      pCtx->sStatData[iCurDid][0].sSliceData.iMbCount[I_SLICE][Intra16x16] += pCtx->sPerInfo.iMbCount[I_SLICE][Intra16x16];
      pCtx->sStatData[iCurDid][0].sSliceData.iMbCount[I_SLICE][7] += pCtx->sPerInfo.iMbCount[I_SLICE][7];
    }

    memset (pCtx->sPerInfo.iMbCount[P_SLICE], 0, 18 * sizeof (int32_t));
    memset (pCtx->sPerInfo.iMbCount[I_SLICE], 0, 18 * sizeof (int32_t));

#endif//MB_TYPES_CHECK
    {
      ++ pCtx->sStatData[iCurDid][0].sSliceData.iSliceCount[pCtx->eSliceType]; // for multiple slices coding
      pCtx->sStatData[iCurDid][0].sSliceData.iSliceSize[pCtx->eSliceType] += (iLayerSize << 3); // bits
    }
#endif//STAT_OUTPUT

    iCountNal = pLayerBsInfo->iNalCount;
    ++ iLayerNum;
    ++ pLayerBsInfo;
    ++ pCtx->pOut->iLayerBsIndex;

    pLayerBsInfo->pBsBuf = pCtx->pFrameBs + pCtx->iPosBsBuffer;
    pLayerBsInfo->pNalLengthInByte = (pLayerBsInfo - 1)->pNalLengthInByte + iCountNal;

    if (pSvcParam->iPaddingFlag && pCtx->pWelsSvcRc[pCtx->uiDependencyId].iPaddingSize > 0) {
      int32_t iPaddingNalSize = 0;
      pCtx->iEncoderError =  WritePadding (pCtx, pCtx->pWelsSvcRc[pCtx->uiDependencyId].iPaddingSize, iPaddingNalSize);
      WELS_VERIFY_RETURN_IFNEQ (pCtx->iEncoderError, ENC_RETURN_SUCCESS)

#if GOM_TRACE_FLAG
      WelsLog (pLogCtx, WELS_LOG_INFO, "[RC] dependency ID = %d,encoding_qp = %d Padding: %d", pCtx->uiDependencyId,
               pCtx->iGlobalQp,
               pCtx->pWelsSvcRc[pCtx->uiDependencyId].iPaddingSize);
#endif
      if (iPaddingNalSize <= 0)
        return ENC_RETURN_UNEXPECTED;

      pCtx->pWelsSvcRc[pCtx->uiDependencyId].iPaddingBitrateStat += pCtx->pWelsSvcRc[pCtx->uiDependencyId].iPaddingSize;

      pCtx->pWelsSvcRc[pCtx->uiDependencyId].iPaddingSize = 0;

      pLayerBsInfo->uiSpatialId         = 0;
      pLayerBsInfo->uiTemporalId        = 0;
      pLayerBsInfo->uiQualityId         = 0;
      pLayerBsInfo->uiLayerType         = NON_VIDEO_CODING_LAYER;
      pLayerBsInfo->iNalCount           = 1;
      pLayerBsInfo->pNalLengthInByte[0] = iPaddingNalSize;
      ++ pLayerBsInfo;
      ++ pCtx->pOut->iLayerBsIndex;
      pLayerBsInfo->pBsBuf           = pCtx->pFrameBs + pCtx->iPosBsBuffer;
      pLayerBsInfo->pNalLengthInByte = (pLayerBsInfo - 1)->pNalLengthInByte + 1;
      ++ iLayerNum;

      iFrameSize += iPaddingNalSize;
    }

    if ((pParam->sSliceArgument.uiSliceMode == SM_FIXEDSLCNUM_SLICE)
        && pSvcParam->bUseLoadBalancing
        && pSvcParam->iMultipleThreadIdc > 1 &&
        pSvcParam->iMultipleThreadIdc >= pParam->sSliceArgument.uiSliceNum) {
      CalcSliceComplexRatio (pCtx->pSliceThreading->pSliceComplexRatio[iCurDid], pCtx->pCurDqLayer->pSliceEncCtx,
                             pCtx->pSliceThreading->pSliceConsumeTime[iCurDid]);
#if defined(MT_DEBUG)
      TrackSliceComplexities (pCtx, iCurDid);
#endif//#if defined(MT_DEBUG)
    }

    pCtx->eLastNalPriority[iDidIdx] = eNalRefIdc;
    ++ iSpatialIdx;

    if (iCurDid + 1 < pSvcParam->iSpatialLayerNum) {
      //for next layer, note that iSpatialIdx has been ++ so it is pointer to next layer
      WelsSwapDqLayers (pCtx, (pSpatialIndexMap + iSpatialIdx)->iDid);
    }

    if (pCtx->pVpp->UpdateSpatialPictures (pCtx, pSvcParam, iCurTid, iDidIdx) != 0) {
      ForceCodingIDR (pCtx);
      WelsLog (pLogCtx, WELS_LOG_WARNING,
               "WelsEncoderEncodeExt(), Logic Error Found in Preprocess updating. ForceCodingIDR!");
      //the above is to set the next frame IDR
      pFbi->eFrameType = eFrameType;
      return ENC_RETURN_CORRECTED;
    }

    if (pSvcParam->bEnableLongTermReference && ((pCtx->pLtr[pCtx->uiDependencyId].bLTRMarkingFlag
        && (pCtx->pLtr[pCtx->uiDependencyId].iLTRMarkMode == LTR_DIRECT_MARK)) || eFrameType == videoFrameTypeIDR)) {
      pCtx->bRefOfCurTidIsLtr[iDidIdx][iCurTid] = true;
    }
  }//end of (iSpatialIdx/iSpatialNum)

  if (ENC_RETURN_CORRECTED == pCtx->iEncoderError) {
    pCtx->pVpp->UpdateSpatialPictures (pCtx, pSvcParam, iCurTid, (pSpatialIndexMap + iSpatialIdx)->iDid);
    ForceCodingIDR (pCtx);
    WelsLog (pLogCtx, WELS_LOG_WARNING, "WelsEncoderEncodeExt(), Logic Error Found in temporal level. ForceCodingIDR!");
    //the above is to set the next frame IDR
    pFbi->eFrameType = eFrameType;
    return ENC_RETURN_CORRECTED;
  }

#if defined(MT_DEBUG)
  TrackSliceConsumeTime (pCtx, iDidList, iSpatialNum);
#endif//MT_DEBUG

  if (pSvcParam->iMultipleThreadIdc > 1 && iDidList[0] == BASE_DEPENDENCY_ID
      && (pSvcParam->sSpatialLayers[0].sSliceArgument.uiSliceMode == SM_FIXEDSLCNUM_SLICE)
      && pSvcParam->bUseLoadBalancing
      && pSvcParam->iMultipleThreadIdc >= pSvcParam->sSpatialLayers[0].sSliceArgument.uiSliceNum
      && ((pSvcParam->sSpatialLayers[iDidList[iSpatialNum - 1]].sSliceArgument.uiSliceMode == SM_FIXEDSLCNUM_SLICE))
      && pSvcParam->iMultipleThreadIdc >= pSvcParam->sSpatialLayers[iDidList[iSpatialNum -
          1]].sSliceArgument.uiSliceNum) {
    AdjustBaseLayer (pCtx);
  }

#ifdef ENABLE_FRAME_DUMP
  DumpRecFrame (fsnr, &pSvcParam->sDependencyLayers[pSvcParam->iSpatialLayerNum - 1].sRecFileName[0],
                pSvcParam->iSpatialLayerNum - 1, pCtx->bRecFlag, pCtx->pCurDqLayer); // pDecPic: final reconstruction output
  pCtx->bRecFlag = true;

#endif//ENABLE_FRAME_DUMP

  // to check number of layers / nals / slices dependencies
  if (iLayerNum > MAX_LAYER_NUM_OF_FRAME) {
    WelsLog (& pCtx->sLogCtx, WELS_LOG_ERROR, "WelsEncoderEncodeExt(), iLayerNum(%d) > MAX_LAYER_NUM_OF_FRAME(%d)!",
             iLayerNum, MAX_LAYER_NUM_OF_FRAME);
    return 1;
  }

  ++ pCtx->iCodingIndex;
  pFbi->iLayerNum = iLayerNum;
  pFbi->iSubSeqId = GetSubSequenceId (pCtx, eFrameType);

  WelsLog (pLogCtx, WELS_LOG_DEBUG, "WelsEncoderEncodeExt() OutputInfo iLayerNum = %d,iSubSeqId = %d,iFrameSize = %d",
           iLayerNum,
           pFbi->iSubSeqId, iFrameSize);
  for (int32_t i = 0; i < iLayerNum; i++)
    WelsLog (pLogCtx, WELS_LOG_DEBUG,
             "WelsEncoderEncodeExt() OutputInfo iLayerId = %d,iNalType = %d,iNalCount = %d, first Nal Length=%d", i,
             pFbi->sLayerInfo[i].uiLayerType, pFbi->sLayerInfo[i].iNalCount, pFbi->sLayerInfo[i].pNalLengthInByte[0]);
  WelsEmms();

  pFbi->eFrameType = eFrameType;
  pFbi->iFrameSizeInBytes = iFrameSize;

#ifdef _DEBUG
  if (pFbi->iLayerNum > MAX_LAYER_NUM_OF_FRAME) {
    WelsLog (& pCtx->sLogCtx, WELS_LOG_ERROR, "WelsEncoderEncodeExt(), iLayerNum(%d) > MAX_LAYER_NUM_OF_FRAME(%d)!",
             pFbi->iLayerNum, MAX_LAYER_NUM_OF_FRAME);
    return ENC_RETURN_UNEXPECTED;
  }

  int32_t iTotalNal = 0;
  for (int32_t k = 0; k < pFbi->iLayerNum; k++) {
    iTotalNal += pFbi->sLayerInfo[k].iNalCount;

    if ((pCtx->iActiveThreadsNum > 1) && (MAX_NAL_UNITS_IN_LAYER < pFbi->sLayerInfo[k].iNalCount)) {
      WelsLog (& pCtx->sLogCtx, WELS_LOG_ERROR,
               "WelsEncoderEncodeExt(), iCountNumNals(%d) > MAX_NAL_UNITS_IN_LAYER(%d) under multi-thread(%d) NOT supported!",
               pFbi->sLayerInfo[k].iNalCount, MAX_NAL_UNITS_IN_LAYER), pCtx->iActiveThreadsNum;
      return ENC_RETURN_UNEXPECTED;
    }
  }

  if (iTotalNal > pCtx->pOut->iCountNals) {
    WelsLog (& pCtx->sLogCtx, WELS_LOG_ERROR, "WelsEncoderEncodeExt(), iTotalNal(%d) > iCountNals(%d)!",
             iTotalNal, pCtx->pOut->iCountNals);
    return ENC_RETURN_UNEXPECTED;
  }
#endif

  return ENC_RETURN_SUCCESS;
}

/*!
 * \brief   Wels SVC encoder parameters adjustment
 *          SVC adjustment results in new requirement in memory blocks adjustment
 */
int32_t WelsEncoderParamAdjust (sWelsEncCtx** ppCtx, SWelsSvcCodingParam* pNewParam) {
  SWelsSvcCodingParam* pOldParam = NULL;
  int32_t iReturn = ENC_RETURN_SUCCESS;
  int8_t iIndexD = 0;
  bool bNeedReset = false;
  int16_t iSliceNum = 1; // number of slices used
  int32_t iCacheLineSize = 16; // on chip cache line size in byte
  uint32_t uiCpuFeatureFlags = 0;

  if (NULL == ppCtx || NULL == *ppCtx || NULL == pNewParam) return 1;

  /* Check validation in new parameters */
  iReturn = ParamValidationExt (& (*ppCtx)->sLogCtx, pNewParam);
  if (iReturn != ENC_RETURN_SUCCESS) return iReturn;

  iReturn = GetMultipleThreadIdc (& (*ppCtx)->sLogCtx, pNewParam, iSliceNum, iCacheLineSize, uiCpuFeatureFlags);
  if (iReturn != ENC_RETURN_SUCCESS) {
    WelsLog (& (*ppCtx)->sLogCtx, WELS_LOG_ERROR, "WelsEncoderParamAdjust(), GetMultipleThreadIdc failed return %d.",
             iReturn);
    return iReturn;
  }

  pOldParam = (*ppCtx)->pSvcParam;

  if (pOldParam->iUsageType != pNewParam->iUsageType) {
    WelsLog (& (*ppCtx)->sLogCtx, WELS_LOG_ERROR,
             "WelsEncoderParamAdjust(), does not expect in-middle change of iUsgaeType from %d to %d", pOldParam->iUsageType,
             pNewParam->iUsageType);
    return ENC_RETURN_UNSUPPORTED_PARA;
  }

  /* Decide whether need reset for IDR frame based on adjusting prarameters changed */
  /* Temporal levels, spatial settings and/ or quality settings changed need update parameter sets related. */
  bNeedReset = (pOldParam == NULL) ||
                (pOldParam->bSimulcastAVC != pNewParam->bSimulcastAVC) ||
                (pOldParam->iSpatialLayerNum != pNewParam->iSpatialLayerNum) ||
                (pOldParam->iPicWidth != pNewParam->iPicWidth
                 || pOldParam->iPicHeight != pNewParam->iPicHeight) ||
                (pOldParam->SUsedPicRect.iWidth != pNewParam->SUsedPicRect.iWidth
                 || pOldParam->SUsedPicRect.iHeight != pNewParam->SUsedPicRect.iHeight) ||
                (pOldParam->bEnableLongTermReference != pNewParam->bEnableLongTermReference) ||
                (pOldParam->iLTRRefNum != pNewParam->iLTRRefNum) ||
                (pOldParam->iMultipleThreadIdc != pNewParam->iMultipleThreadIdc) ||
                (pOldParam->bEnableBackgroundDetection != pNewParam->bEnableBackgroundDetection) ||
                (pOldParam->bEnableAdaptiveQuant != pNewParam->bEnableAdaptiveQuant) ||
                (pOldParam->eSpsPpsIdStrategy != pNewParam->eSpsPpsIdStrategy);
  if (pNewParam->iMaxNumRefFrame > pOldParam->iMaxNumRefFrame) {
    bNeedReset = true;
  }

  if (!bNeedReset) { // Check its picture resolutions/quality settings respectively in each dependency layer
    iIndexD = 0;
    assert (pOldParam->iSpatialLayerNum == pNewParam->iSpatialLayerNum);
    do {
      const SSpatialLayerInternal* kpOldDlp     = &pOldParam->sDependencyLayers[iIndexD];
      const SSpatialLayerInternal* kpNewDlp     = &pNewParam->sDependencyLayers[iIndexD];
      float fT1 = .0f;
      float fT2 = .0f;

      // check frame size settings
      if (pOldParam->sSpatialLayers[iIndexD].iVideoWidth != pNewParam->sSpatialLayers[iIndexD].iVideoWidth ||
          pOldParam->sSpatialLayers[iIndexD].iVideoHeight != pNewParam->sSpatialLayers[iIndexD].iVideoHeight ||
          kpOldDlp->iActualWidth != kpNewDlp->iActualWidth ||
          kpOldDlp->iActualHeight != kpNewDlp->iActualHeight) {
        bNeedReset = true;
        break;
      }

      if (pOldParam->sSpatialLayers[iIndexD].sSliceArgument.uiSliceMode != pNewParam->sSpatialLayers[iIndexD].sSliceArgument.uiSliceMode
          ||
          pOldParam->sSpatialLayers[iIndexD].sSliceArgument.uiSliceNum !=
          pNewParam->sSpatialLayers[iIndexD].sSliceArgument.uiSliceNum) {
        bNeedReset = true;
        break;
      }

      // check frame rate
      // we can not check whether corresponding fFrameRate is equal or not,
      // only need to check d_max/d_min and max_fr/d_max whether it is equal or not
      if (kpNewDlp->fInputFrameRate > EPSN && kpOldDlp->fInputFrameRate > EPSN)
        fT1 = kpNewDlp->fOutputFrameRate / kpNewDlp->fInputFrameRate - kpOldDlp->fOutputFrameRate / kpOldDlp->fInputFrameRate;
      if (kpNewDlp->fOutputFrameRate > EPSN && kpOldDlp->fOutputFrameRate > EPSN)
        fT2 = pNewParam->fMaxFrameRate / kpNewDlp->fOutputFrameRate - pOldParam->fMaxFrameRate / kpOldDlp->fOutputFrameRate;
      if (fT1 > EPSN || fT1 < -EPSN || fT2 > EPSN || fT2 < -EPSN) {
        bNeedReset = true;
        break;
      }
      ++ iIndexD;
    } while (iIndexD < pOldParam->iSpatialLayerNum);
  }

  if (bNeedReset) {
    SLogContext sLogCtx = (*ppCtx)->sLogCtx;

    int32_t iOldSpsPpsIdStrategy = pOldParam->eSpsPpsIdStrategy;
    SParaSetOffsetVariable sTmpPsoVariable[PARA_SET_TYPE];
    int32_t  iTmpPpsIdList[MAX_DQ_LAYER_NUM * MAX_PPS_COUNT];
    uint16_t uiTmpIdrPicId = (*ppCtx)->uiIdrPicId;//this is for LTR!

    SEncoderStatistics sTempEncoderStatistics = (*ppCtx)->sEncoderStatistics;

    SExistingParasetList sExistingParasetList;
    SExistingParasetList* pExistingParasetList = NULL;

    if ((CONSTANT_ID != iOldSpsPpsIdStrategy) && (CONSTANT_ID != pNewParam->eSpsPpsIdStrategy)) {
      for (int32_t k = 0; k < PARA_SET_TYPE; k++) {
        memset (((*ppCtx)->sPSOVector.sParaSetOffsetVariable[k].bUsedParaSetIdInBs), 0, MAX_PPS_COUNT * sizeof (bool));
      }
      memcpy (sTmpPsoVariable, (*ppCtx)->sPSOVector.sParaSetOffsetVariable,
              (PARA_SET_TYPE)*sizeof (SParaSetOffsetVariable)); // confirmed_safe_unsafe_usage

      if ((SPS_LISTING & iOldSpsPpsIdStrategy)
          && (SPS_LISTING & pNewParam->eSpsPpsIdStrategy)) {
        pExistingParasetList = &sExistingParasetList;
        sExistingParasetList.uiInUseSpsNum = (*ppCtx)->sPSOVector.uiInUseSpsNum;
        memcpy (sExistingParasetList.sSps, (*ppCtx)->pSpsArray, MAX_SPS_COUNT * sizeof (SWelsSPS));
        if (NULL != (*ppCtx)->pSubsetArray) {
          sExistingParasetList.uiInUseSubsetSpsNum = (*ppCtx)->sPSOVector.uiInUseSubsetSpsNum;
          memcpy (sExistingParasetList.sSubsetSps, (*ppCtx)->pSubsetArray, MAX_SPS_COUNT * sizeof (SSubsetSps));
        } else {
          sExistingParasetList.uiInUseSubsetSpsNum = 0;
        }
      }

      if ((SPS_PPS_LISTING == iOldSpsPpsIdStrategy)
          && (SPS_PPS_LISTING == pNewParam->eSpsPpsIdStrategy)) {
        pExistingParasetList = &sExistingParasetList;
        sExistingParasetList.uiInUseSpsNum = (*ppCtx)->sPSOVector.uiInUseSpsNum;
        sExistingParasetList.uiInUsePpsNum = (*ppCtx)->sPSOVector.uiInUsePpsNum;
        memcpy (sExistingParasetList.sSps, (*ppCtx)->pSpsArray, MAX_SPS_COUNT * sizeof (SWelsSPS));
        memcpy (sExistingParasetList.sPps, (*ppCtx)->pPps, MAX_PPS_COUNT * sizeof (SWelsPPS));

        if (NULL != (*ppCtx)->pSubsetArray) {
          sExistingParasetList.uiInUseSubsetSpsNum = (*ppCtx)->sPSOVector.uiInUseSubsetSpsNum;
          memcpy (sExistingParasetList.sSubsetSps, (*ppCtx)->pSubsetArray, MAX_SPS_COUNT * sizeof (SSubsetSps));
        } else {
          sExistingParasetList.uiInUseSubsetSpsNum = 0;
        }

        memcpy (iTmpPpsIdList, ((*ppCtx)->sPSOVector.iPpsIdList), MAX_DQ_LAYER_NUM * MAX_PPS_COUNT * sizeof (int32_t));
      }
    }


    WelsUninitEncoderExt (ppCtx);

    /* Update new parameters */
    if (WelsInitEncoderExt (ppCtx, pNewParam, &sLogCtx, pExistingParasetList))
      return 1;

    // reset the scaled spatial picture size
    (*ppCtx)->pVpp->WelsPreprocessReset (*ppCtx);
    //if WelsInitEncoderExt succeed

    //for LTR
    (*ppCtx)->uiIdrPicId = uiTmpIdrPicId;

    //for sEncoderStatistics
    (*ppCtx)->sEncoderStatistics = sTempEncoderStatistics;

    //load back the needed structure for eSpsPpsIdStrategy
    if ((CONSTANT_ID != iOldSpsPpsIdStrategy) && (CONSTANT_ID != pNewParam->eSpsPpsIdStrategy)) {
      memcpy ((*ppCtx)->sPSOVector.sParaSetOffsetVariable, sTmpPsoVariable,
              (PARA_SET_TYPE)*sizeof (SParaSetOffsetVariable)); // confirmed_safe_unsafe_usage
    }

    if ((SPS_PPS_LISTING == iOldSpsPpsIdStrategy)
        && (SPS_PPS_LISTING == pNewParam->eSpsPpsIdStrategy)) {
      memcpy (((*ppCtx)->sPSOVector.iPpsIdList), iTmpPpsIdList, MAX_DQ_LAYER_NUM * MAX_PPS_COUNT * sizeof (int32_t));
    }
  } else {
    /* maybe adjustment introduced in bitrate or little settings adjustment and so on.. */
    pNewParam->iNumRefFrame                     = WELS_CLIP3 (pNewParam->iNumRefFrame, MIN_REF_PIC_COUNT,
                                            (pNewParam->iUsageType == CAMERA_VIDEO_REAL_TIME ? MAX_REFERENCE_PICTURE_COUNT_NUM_CAMERA :
                                                MAX_REFERENCE_PICTURE_COUNT_NUM_SCREEN));
    pNewParam->iLoopFilterDisableIdc            = WELS_CLIP3 (pNewParam->iLoopFilterDisableIdc, 0, 6);
    pNewParam->iLoopFilterAlphaC0Offset         = WELS_CLIP3 (pNewParam->iLoopFilterAlphaC0Offset, -6, 6);
    pNewParam->iLoopFilterBetaOffset            = WELS_CLIP3 (pNewParam->iLoopFilterBetaOffset, -6, 6);
    pNewParam->fMaxFrameRate                    = WELS_CLIP3 (pNewParam->fMaxFrameRate, MIN_FRAME_RATE, MAX_FRAME_RATE);

    // we can not use direct struct based memcpy due some fields need keep unchanged as before
    pOldParam->fMaxFrameRate    = pNewParam->fMaxFrameRate;             // maximal frame rate [Hz / fps]
    pOldParam->iComplexityMode  = pNewParam->iComplexityMode;                   // color space of input sequence
    pOldParam->uiIntraPeriod    = pNewParam->uiIntraPeriod;             // intra period (multiple of GOP size as desired)
    pOldParam->eSpsPpsIdStrategy = pNewParam->eSpsPpsIdStrategy;
    pOldParam->bPrefixNalAddingCtrl = pNewParam->bPrefixNalAddingCtrl;
    pOldParam->iNumRefFrame     = pNewParam->iNumRefFrame;              // number of reference frame used
    pOldParam->uiGopSize = pNewParam->uiGopSize;
    if (pOldParam->iTemporalLayerNum != pNewParam->iTemporalLayerNum) {
      pOldParam->iTemporalLayerNum = pNewParam->iTemporalLayerNum;
      (*ppCtx)->iCodingIndex = 0;
    }
    pOldParam->iDecompStages = pNewParam->iDecompStages;
    /* denoise control */
    pOldParam->bEnableDenoise = pNewParam->bEnableDenoise;

    /* background detection control */
    pOldParam->bEnableBackgroundDetection = pNewParam->bEnableBackgroundDetection;

    /* adaptive quantization control */
    pOldParam->bEnableAdaptiveQuant = pNewParam->bEnableAdaptiveQuant;

    /* int32_t term reference control */
    pOldParam->bEnableLongTermReference = pNewParam->bEnableLongTermReference;
    pOldParam->iLtrMarkPeriod = pNewParam->iLtrMarkPeriod;

    // keep below values unchanged as before
    pOldParam->bEnableSSEI              = pNewParam->bEnableSSEI;
    pOldParam->bSimulcastAVC            = pNewParam->bSimulcastAVC;
    pOldParam->bEnableFrameCroppingFlag = pNewParam->bEnableFrameCroppingFlag;  // enable frame cropping flag

    /* Motion search */

    /* Deblocking loop filter */
    pOldParam->iLoopFilterDisableIdc    = pNewParam->iLoopFilterDisableIdc;     // 0: on, 1: off, 2: on except for slice boundaries
    pOldParam->iLoopFilterAlphaC0Offset = pNewParam->iLoopFilterAlphaC0Offset;// AlphaOffset: valid range [-6, 6], default 0
    pOldParam->iLoopFilterBetaOffset    = pNewParam->iLoopFilterBetaOffset;     // BetaOffset:  valid range [-6, 6], default 0

    /* Rate Control */
    pOldParam->iRCMode          = pNewParam->iRCMode;
    pOldParam->iTargetBitrate   = pNewParam->iTargetBitrate;                    // overall target bitrate introduced in RC module
    pOldParam->iPaddingFlag     = pNewParam->iPaddingFlag;

    /* Layer definition */
    pOldParam->bPrefixNalAddingCtrl = pNewParam->bPrefixNalAddingCtrl;

    // d
    iIndexD = 0;
    do {
      SSpatialLayerInternal* pOldDlpInternal    = &pOldParam->sDependencyLayers[iIndexD];
      SSpatialLayerInternal* pNewDlpInternal    = &pNewParam->sDependencyLayers[iIndexD];

      SSpatialLayerConfig* pOldDlp      = &pOldParam->sSpatialLayers[iIndexD];
      SSpatialLayerConfig* pNewDlp      = &pNewParam->sSpatialLayers[iIndexD];

      pOldDlpInternal->fInputFrameRate  = pNewDlpInternal->fInputFrameRate;     // input frame rate
      pOldDlpInternal->fOutputFrameRate = pNewDlpInternal->fOutputFrameRate;    // output frame rate
      pOldDlp->iSpatialBitrate          = pNewDlp->iSpatialBitrate;

      pOldDlp->uiProfileIdc             = pNewDlp->uiProfileIdc;                        // value of profile IDC (0 for auto-detection)
      pOldDlp->iDLayerQp                = pNewDlp->iDLayerQp;

      /* Derived variants below */
      pOldDlpInternal->iTemporalResolution      = pNewDlpInternal->iTemporalResolution;
      pOldDlpInternal->iDecompositionStages     = pNewDlpInternal->iDecompositionStages;

      memcpy (pOldDlpInternal->uiCodingIdx2TemporalId, pNewDlpInternal->uiCodingIdx2TemporalId,
              sizeof (pOldDlpInternal->uiCodingIdx2TemporalId)); // confirmed_safe_unsafe_usage

      ++ iIndexD;
    } while (iIndexD < pOldParam->iSpatialLayerNum);
  }

  /* Any else initialization/reset for rate control here? */

  return 0;
}

int32_t WelsEncoderApplyLTR (SLogContext* pLogCtx, sWelsEncCtx** ppCtx, SLTRConfig* pLTRValue) {
  SWelsSvcCodingParam sConfig;
  int32_t iNumRefFrame = 1;
  int32_t iRet = 0;
  memcpy (&sConfig, (*ppCtx)->pSvcParam, sizeof (SWelsSvcCodingParam));
  sConfig.bEnableLongTermReference = pLTRValue->bEnableLongTermReference;
  sConfig.iLTRRefNum = pLTRValue->iLTRRefNum;
  int32_t uiGopSize = 1 << (sConfig.iTemporalLayerNum - 1);
  if (sConfig.iUsageType == SCREEN_CONTENT_REAL_TIME) {
    if (sConfig.bEnableLongTermReference) {
      sConfig.iLTRRefNum = LONG_TERM_REF_NUM_SCREEN;//WELS_CLIP3 (sConfig.iLTRRefNum, 1, LONG_TERM_REF_NUM_SCREEN);
      iNumRefFrame = WELS_MAX (1, WELS_LOG2 (uiGopSize)) + sConfig.iLTRRefNum;
    } else {
      sConfig.iLTRRefNum = 0;
      iNumRefFrame = WELS_MAX (1, uiGopSize >> 1);
    }
  } else {
    if (sConfig.bEnableLongTermReference) {
      sConfig.iLTRRefNum = LONG_TERM_REF_NUM;//WELS_CLIP3 (sConfig.iLTRRefNum, 1, LONG_TERM_REF_NUM);
    } else {
      sConfig.iLTRRefNum = 0;
    }
    iNumRefFrame = ((uiGopSize >> 1) > 1) ? ((uiGopSize >> 1) + sConfig.iLTRRefNum) : (MIN_REF_PIC_COUNT +
                      sConfig.iLTRRefNum);
    iNumRefFrame = WELS_CLIP3 (iNumRefFrame, MIN_REF_PIC_COUNT, MAX_REFERENCE_PICTURE_COUNT_NUM_CAMERA);

  }
  if (iNumRefFrame > sConfig.iMaxNumRefFrame) {
    WelsLog (pLogCtx, WELS_LOG_WARNING,
             " CWelsH264SVCEncoder::SetOption LTR flag = %d and number = %d: Required number of reference increased to %d and iMaxNumRefFrame is adjusted (from %d)",
             sConfig.bEnableLongTermReference, sConfig.iLTRRefNum, iNumRefFrame, sConfig.iMaxNumRefFrame);
    sConfig.iMaxNumRefFrame = iNumRefFrame;
  }

  if (sConfig.iNumRefFrame < iNumRefFrame) {
    WelsLog (pLogCtx, WELS_LOG_WARNING,
             " CWelsH264SVCEncoder::SetOption LTR flag = %d and number = %d, Required number of reference increased from Old = %d to New = %d because of LTR setting",
             sConfig.bEnableLongTermReference, sConfig.iLTRRefNum, sConfig.iNumRefFrame, iNumRefFrame);
    sConfig.iNumRefFrame = iNumRefFrame;
  }
  WelsLog (pLogCtx, WELS_LOG_INFO, "CWelsH264SVCEncoder::SetOption enable LTR = %d,ltrnum = %d",
           sConfig.bEnableLongTermReference, sConfig.iLTRRefNum);
  iRet = WelsEncoderParamAdjust (ppCtx, &sConfig);
  return iRet;
}
int32_t DynSliceRealloc (sWelsEncCtx* pCtx,
                         SFrameBSInfo* pFrameBsInfo,
                         SLayerBSInfo* pLayerBsInfo) {
  CMemoryAlign* pMA = pCtx->pMemAlign;
  SDqLayer* pCurLayer = pCtx->pCurDqLayer;

  int32_t iCountNals = pCtx->pOut->iCountNals;
  int32_t iMaxSliceNumOld = pCurLayer->pSliceEncCtx->iMaxSliceNumConstraint;
  int32_t iMaxSliceNum = iMaxSliceNumOld;
  iCountNals += iMaxSliceNum * (pCtx->pSvcParam->iSpatialLayerNum + pCtx->bNeedPrefixNalFlag);
  iMaxSliceNum *= SLICE_NUM_EXPAND_COEF;

  SWelsNalRaw* pNalList = (SWelsNalRaw*)pMA->WelsMalloc (iCountNals * sizeof (SWelsNalRaw), "pOut->sNalList");
  if (NULL == pNalList) {
    WelsLog (& (pCtx->sLogCtx), WELS_LOG_ERROR, "CWelsH264SVCEncoder::DynSliceRealloc: pNalList is NULL");
    return ENC_RETURN_MEMALLOCERR;
  }
  memcpy (pNalList, pCtx->pOut->sNalList, sizeof (SWelsNalRaw) * pCtx->pOut->iCountNals);
  pMA->WelsFree (pCtx->pOut->sNalList, "pOut->sNalList");
  pCtx->pOut->sNalList = pNalList;

  int32_t* pNalLen = (int32_t*)pMA->WelsMalloc (iCountNals * sizeof (int32_t), "pOut->pNalLen");
  if (NULL == pNalLen) {
    WelsLog (& (pCtx->sLogCtx), WELS_LOG_ERROR, "CWelsH264SVCEncoder::DynSliceRealloc: pNalLen is NULL");
    return ENC_RETURN_MEMALLOCERR;
  }
  memcpy (pNalLen, pCtx->pOut->pNalLen, sizeof (int32_t) * pCtx->pOut->iCountNals);
  pMA->WelsFree (pCtx->pOut->pNalLen, "pOut->pNalLen");
  pCtx->pOut->pNalLen = pNalLen;

  pCtx->pOut->iCountNals = iCountNals;
  SLayerBSInfo* pLBI1, *pLBI2;
  pLBI1 = &pFrameBsInfo->sLayerInfo[0];
  pLBI1->pNalLengthInByte = pCtx->pOut->pNalLen;
  while (pLBI1 != pLayerBsInfo) {
    pLBI2 = pLBI1;
    ++ pLBI1;
    pLBI1->pNalLengthInByte = pLBI2->pNalLengthInByte + pLBI2->iNalCount;
  }

  SSlice* pSlice = (SSlice*)pMA->WelsMallocz (sizeof (SSlice) * iMaxSliceNum, "Slice");
  if (NULL == pSlice) {
    WelsLog (& (pCtx->sLogCtx), WELS_LOG_ERROR, "CWelsH264SVCEncoder::DynSliceRealloc: pSlice is NULL");
    return ENC_RETURN_MEMALLOCERR;
  }
  memcpy (pSlice, pCurLayer->sLayerInfo.pSliceInLayer, sizeof (SSlice) * iMaxSliceNumOld);
  int32_t uiSliceIdx;
  uiSliceIdx = iMaxSliceNumOld;
  SSlice* pBaseSlice = &pCurLayer->sLayerInfo.pSliceInLayer[0];
  SSliceHeaderExt* pBaseSHExt = &pBaseSlice->sSliceHeaderExt;
  SSlice* pSliceIdx = &pSlice[uiSliceIdx];
  while (uiSliceIdx < iMaxSliceNum) {
    SSliceHeaderExt* pSHExt = &pSliceIdx->sSliceHeaderExt;
    pSliceIdx->uiSliceIdx = uiSliceIdx;
    if (pCtx->pSvcParam->iMultipleThreadIdc > 1)
      pSliceIdx->pSliceBsa = &pCtx->pSliceBs[uiSliceIdx].sBsWrite;
    else
      pSliceIdx->pSliceBsa = &pCtx->pOut->sBsWrite;
    if (AllocMbCacheAligned (&pSliceIdx->sMbCacheInfo, pMA)) {
      WelsLog (& (pCtx->sLogCtx), WELS_LOG_ERROR,
               "CWelsH264SVCEncoder::DynSliceRealloc: realloc MbCache not successful at slice_idx=%d (max-slice=%d)",
               uiSliceIdx, iMaxSliceNum);
      return ENC_RETURN_MEMALLOCERR;
    }

    pSliceIdx->bSliceHeaderExtFlag = pBaseSlice->bSliceHeaderExtFlag;
    pSHExt->sSliceHeader.iPpsId = pBaseSHExt->sSliceHeader.iPpsId;
    pSHExt->sSliceHeader.pPps = pBaseSHExt->sSliceHeader.pPps;
    pSHExt->sSliceHeader.iSpsId = pBaseSHExt->sSliceHeader.iSpsId;
    pSHExt->sSliceHeader.pSps = pBaseSHExt->sSliceHeader.pSps;
    pSHExt->sSliceHeader.uiRefCount = pCtx->iNumRef0;
    memcpy (&pSHExt->sSliceHeader.sRefMarking, &pBaseSHExt->sSliceHeader.sRefMarking, sizeof (SRefPicMarking));
    memcpy (&pSHExt->sSliceHeader.sRefReordering, &pBaseSHExt->sSliceHeader.sRefReordering,
            sizeof (SRefPicListReorderSyntax));

    pSliceIdx++;
    uiSliceIdx++;
  }
  pMA->WelsFree (pCurLayer->sLayerInfo.pSliceInLayer, "Slice");
  pCurLayer->sLayerInfo.pSliceInLayer = pSlice;

  int32_t* pFirstMbInSlice = (int32_t*)pMA->WelsMalloc (iMaxSliceNum * sizeof (int32_t), "pSliceSeg->pFirstMbInSlice");
  if (NULL == pFirstMbInSlice) {
    WelsLog (& (pCtx->sLogCtx), WELS_LOG_ERROR, "CWelsH264SVCEncoder::DynSliceRealloc: pFirstMbInSlice is NULL");
    return ENC_RETURN_MEMALLOCERR;
  }
  memset (pFirstMbInSlice, 0, sizeof (int32_t) * iMaxSliceNum);
  memcpy (pFirstMbInSlice, pCurLayer->pSliceEncCtx->pFirstMbInSlice, sizeof (int32_t) * iMaxSliceNumOld);
  pMA->WelsFree (pCurLayer->pSliceEncCtx->pFirstMbInSlice, "pSliceSeg->pFirstMbInSlice");
  pCurLayer->pSliceEncCtx->pFirstMbInSlice = pFirstMbInSlice;

  int32_t* pCountMbNumInSlice = (int32_t*)pMA->WelsMalloc (iMaxSliceNum * sizeof (int32_t),
                                "pSliceSeg->pCountMbNumInSlice");
  if (NULL == pCountMbNumInSlice) {
    WelsLog (& (pCtx->sLogCtx), WELS_LOG_ERROR,
             "CWelsH264SVCEncoder::DynSliceRealloc: realloc pCountMbNumInSlice not successful");
    return ENC_RETURN_MEMALLOCERR;
  }
  memcpy (pCountMbNumInSlice, pCurLayer->pSliceEncCtx->pCountMbNumInSlice, sizeof (int32_t) * iMaxSliceNumOld);
  uiSliceIdx = iMaxSliceNumOld;
  while (uiSliceIdx < iMaxSliceNum) {
    pCountMbNumInSlice[uiSliceIdx] = pCurLayer->pSliceEncCtx->iMbNumInFrame;
    uiSliceIdx++;
  }
  pMA->WelsFree (pCurLayer->pSliceEncCtx->pCountMbNumInSlice, "pSliceSeg->pCountMbNumInSlice");
  pCurLayer->pSliceEncCtx->pCountMbNumInSlice = pCountMbNumInSlice;

  //deal with rate control variables
  const int32_t kiCurDid = pCtx->uiDependencyId;
  SRCSlicing* pSlcingOverRc = (SRCSlicing*)pMA->WelsMalloc (iMaxSliceNum * sizeof (SRCSlicing), "SlicingOverRC");
  if (NULL == pSlcingOverRc) {
    WelsLog (& (pCtx->sLogCtx), WELS_LOG_ERROR,
             "CWelsH264SVCEncoder::DynSliceRealloc: realloc pSlcingOverRc not successful");
    return ENC_RETURN_MEMALLOCERR;
  }
  memcpy (pSlcingOverRc, pCtx->pWelsSvcRc[kiCurDid].pSlicingOverRc, sizeof (SRCSlicing) * iMaxSliceNumOld);
  uiSliceIdx = iMaxSliceNumOld;
  SRCSlicing* pSORC = &pSlcingOverRc[uiSliceIdx];
  const int32_t kiBitsPerMb = WELS_DIV_ROUND (pCtx->pWelsSvcRc[kiCurDid].iTargetBits * INT_MULTIPLY,
                                pCtx->pWelsSvcRc[kiCurDid].iNumberMbFrame);
  while (uiSliceIdx < iMaxSliceNum) {
    pSORC->iComplexityIndexSlice = 0;
    pSORC->iCalculatedQpSlice = pCtx->iGlobalQp;
    pSORC->iTotalQpSlice    = 0;
    pSORC->iTotalMbSlice    = 0;
    pSORC->iTargetBitsSlice = WELS_DIV_ROUND (kiBitsPerMb * pCurLayer->pSliceEncCtx->pCountMbNumInSlice[uiSliceIdx],
                              INT_MULTIPLY);
    pSORC->iFrameBitsSlice  = 0;
    pSORC->iGomBitsSlice    = 0;
    pSORC ++;
    uiSliceIdx ++;
  }
  pMA->WelsFree (pCtx->pWelsSvcRc[kiCurDid].pSlicingOverRc, "SlicingOverRC");
  pCtx->pWelsSvcRc[kiCurDid].pSlicingOverRc = pSlcingOverRc;

  if (pCtx->iMaxSliceCount < iMaxSliceNum)
    pCtx->iMaxSliceCount = iMaxSliceNum;
  pCurLayer->pSliceEncCtx->iMaxSliceNumConstraint = iMaxSliceNum;
  return ENC_RETURN_SUCCESS;
}

int32_t WelsCodeOnePicPartition (sWelsEncCtx* pCtx,
                                 SFrameBSInfo* pFrameBSInfo,
                                 SLayerBSInfo* pLayerBsInfo,
                                 int32_t* pNalIdxInLayer,
                                 int32_t* pLayerSize,
                                 int32_t iFirstMbInPartition,   // first mb inclusive in partition
                                 int32_t iEndMbInPartition,     // end mb exclusive in partition
                                 int32_t iStartSliceIdx
                                ) {

  SDqLayer* pCurLayer                   = pCtx->pCurDqLayer;
  SSliceCtx* pSliceCtx                  = pCurLayer->pSliceEncCtx;
  int32_t iNalIdxInLayer                = *pNalIdxInLayer;
  int32_t iSliceIdx                     = iStartSliceIdx;
  const int32_t kiSliceStep             = pCtx->iActiveThreadsNum;
  const int32_t kiPartitionId           = iStartSliceIdx % kiSliceStep;
  int32_t iPartitionBsSize              = 0;
  int32_t iAnyMbLeftInPartition         = iEndMbInPartition - iFirstMbInPartition;
  const EWelsNalUnitType keNalType      = pCtx->eNalType;
  const EWelsNalRefIdc keNalRefIdc      = pCtx->eNalPriority;
  const bool kbNeedPrefix               = pCtx->bNeedPrefixNalFlag;
  const int32_t kiSliceIdxStep          = pCtx->iActiveThreadsNum;
  int32_t iReturn = ENC_RETURN_SUCCESS;

  //init
  {
    pSliceCtx->pFirstMbInSlice[iSliceIdx]               = iFirstMbInPartition;
    pCurLayer->pNumSliceCodedOfPartition[kiPartitionId] = 1;    // one slice per partition intialized, dynamic slicing inside
    pCurLayer->pLastMbIdxOfPartition[kiPartitionId]     = iEndMbInPartition - 1;
  }
  pCurLayer->pLastCodedMbIdxOfPartition[kiPartitionId] = 0;

  while (iAnyMbLeftInPartition > 0) {
    int32_t iSliceSize      = 0;
    int32_t iPayloadSize    = 0;

    if (iSliceIdx >= (pSliceCtx->iMaxSliceNumConstraint - kiSliceIdxStep)) { // insufficient memory in pSliceInLayer[]
      if (pCtx->iActiveThreadsNum == 1) {
        //only single thread support re-alloc now
        if (DynSliceRealloc (pCtx, pFrameBSInfo, pLayerBsInfo)) {
          WelsLog (& (pCtx->sLogCtx), WELS_LOG_ERROR,
                   "CWelsH264SVCEncoder::WelsCodeOnePicPartition: DynSliceRealloc not successful");
          return ENC_RETURN_MEMALLOCERR;
        }
      } else if (iSliceIdx >= pSliceCtx->iMaxSliceNumConstraint) {
        WelsLog (& (pCtx->sLogCtx), WELS_LOG_ERROR,
                 "CWelsH264SVCEncoder::WelsCodeOnePicPartition: iSliceIdx(%d) over iMaxSliceNumConstraint(%d)", iSliceIdx,
                 pSliceCtx->iMaxSliceNumConstraint);
        return ENC_RETURN_MEMALLOCERR;
      }
    }

    if (kbNeedPrefix) {
      iReturn = AddPrefixNal (pCtx, pLayerBsInfo, &pLayerBsInfo->pNalLengthInByte[0], &iNalIdxInLayer, keNalType, keNalRefIdc,
                              iPayloadSize);
      WELS_VERIFY_RETURN_IFNEQ (iReturn, ENC_RETURN_SUCCESS)
      iPartitionBsSize += iPayloadSize;
    }

    WelsLoadNal (pCtx->pOut, keNalType, keNalRefIdc);
    iReturn = WelsCodeOneSlice (pCtx, iSliceIdx, keNalType);
    WELS_VERIFY_RETURN_IFNEQ (iReturn, ENC_RETURN_SUCCESS)
    WelsUnloadNal (pCtx->pOut);

    iReturn = WelsEncodeNal (&pCtx->pOut->sNalList[pCtx->pOut->iNalIndex - 1],
                             &pCtx->pCurDqLayer->sLayerInfo.sNalHeaderExt,
                             pCtx->iFrameBsSize - pCtx->iPosBsBuffer,
                             pCtx->pFrameBs + pCtx->iPosBsBuffer,
                             &pLayerBsInfo->pNalLengthInByte[iNalIdxInLayer]);
    WELS_VERIFY_RETURN_IFNEQ (iReturn, ENC_RETURN_SUCCESS)
    iSliceSize = pLayerBsInfo->pNalLengthInByte[iNalIdxInLayer];

    pCtx->iPosBsBuffer  += iSliceSize;
    iPartitionBsSize    += iSliceSize;

#if defined(SLICE_INFO_OUTPUT)
    fprintf (stderr,
             "@slice=%-6d sliceType:%c idc:%d size:%-6d\n",
             iSliceIdx,
             (pCtx->eSliceType == P_SLICE ? 'P' : 'I'),
             eNalRefIdc,
             iSliceSize);
#endif//SLICE_INFO_OUTPUT

    ++ iNalIdxInLayer;
    iSliceIdx += kiSliceStep; //if uiSliceIdx is not continuous
    iAnyMbLeftInPartition = iEndMbInPartition - (1 + pCurLayer->pLastCodedMbIdxOfPartition[kiPartitionId]);
  }

  *pLayerSize           = iPartitionBsSize;
  *pNalIdxInLayer       = iNalIdxInLayer;

  // slice based packing???
  pLayerBsInfo->uiLayerType     = VIDEO_CODING_LAYER;
  pLayerBsInfo->uiSpatialId     = pCtx->uiDependencyId;
  pLayerBsInfo->uiTemporalId    = pCtx->uiTemporalId;
  pLayerBsInfo->uiQualityId     = 0;
  pLayerBsInfo->iNalCount       = iNalIdxInLayer;

  return ENC_RETURN_SUCCESS;
}
} // namespace WelsEnc