shithub: openh264

Download patch

ref: 40e1a69faee04abfd8e55cbe4b9bbd6766728335
parent: 33bb96f60498571e840fd83f817088b1826bb5dd
parent: a7a5b7b0f410588e11fa1cd6d53c4a7d8705ddae
author: sijchen <sijchen@cisco.com>
date: Tue Mar 22 12:20:37 EDT 2016

Merge pull request #2421 from shihuade/MultiThread_V5.2_Pull_V2

refactor for slice buffer init/allocate/free

--- a/codec/encoder/core/inc/mt_defs.h
+++ b/codec/encoder/core/inc/mt_defs.h
@@ -90,11 +90,6 @@
 bool                            bThreadBsBufferUsage[MAX_THREADS_NUM];
 WELS_MUTEX                      mutexThreadBsBufferUsage;
 
-SSlice*                         pSliceInThread[MAX_THREADS_NUM]; //slice buffer
-int32_t*                        piSliceIndexInThread[MAX_THREADS_NUM];
-int32_t                         iMaxSliceNumInThread[MAX_THREADS_NUM];
-int32_t                         iEncodedSliceNumInThread[MAX_THREADS_NUM];
-
 } SSliceThreading;
 
 #endif//MULTIPLE_THREADING_DEFINES_H__
--- a/codec/encoder/core/inc/svc_enc_frame.h
+++ b/codec/encoder/core/inc/svc_enc_frame.h
@@ -68,10 +68,17 @@
 int32_t iHighFreMbCount;
 } SFeatureSearchPreparation; //maintain only one
 
+typedef struct TagSliceThreadInfo {
+SSlice*                 pSliceInThread[MAX_THREADS_NUM];// slice buffer for multi thread,
+                                                        // will not alloated when multi thread is off
+int32_t                 iMaxSliceNumInThread[MAX_THREADS_NUM];
+int32_t                 iEncodedSliceNumInThread[MAX_THREADS_NUM];
+}SSliceThreadInfo;
+
 typedef struct TagLayerInfo {
 SNalUnitHeaderExt       sNalHeaderExt;
-SSlice*
-pSliceInLayer;// Here SSlice identify to Frame on concept, [iSliceIndex], need memory block external side       for MT
+SSlice*                 pSliceInLayer;  // Here SSlice identify to Frame on concept, [iSliceIndex],
+                                        // may need extend list size for sliceMode=SM_SIZELIMITED_SLICE
 SSubsetSps*             pSubsetSpsP;    // current pSubsetSps used, memory alloc in external
 SWelsSPS*               pSpsP;          // current pSps based avc used, memory alloc in external
 SWelsPPS*               pPpsP;          // current pPps used
@@ -79,6 +86,8 @@
 /* Layer Representation */
 struct TagDqLayer {
 SLayerInfo              sLayerInfo;
+SSliceThreadInfo        sSliceThreadInfo;
+SSlice**                ppSliceInLayer;
 SSliceCtx               sSliceEncCtx;   // current slice context
 uint8_t*                pCsData[3];     // pointer to reconstructed picture pData
 int32_t                 iCsStride[3];   // Cs stride
--- a/codec/encoder/core/inc/svc_encode_slice.h
+++ b/codec/encoder/core/inc/svc_encode_slice.h
@@ -80,6 +80,30 @@
 int32_t WelsISliceMdEnc (sWelsEncCtx* pEncCtx, SSlice* pSlice);         // for intra non-dynamic slice
 int32_t WelsISliceMdEncDynamic (sWelsEncCtx* pEncCtx, SSlice* pSlice);  // for intra dynamic slice
 
+//slice buffer init, allocate and free process
+int32_t AllocMbCacheAligned (SMbCache* pMbCache, CMemoryAlign* pMa);
+void FreeMbCache (SMbCache* pMbCache, CMemoryAlign* pMa);
+
+int32_t InitSliceMBInfo (SSliceArgument* pSliceArgument,
+                         SSlice* pSlice,
+                         const int32_t kiMBWidth,
+                         const int32_t kiMBHeight,
+                         CMemoryAlign* pMa);
+
+int32_t AllocateSliceMBBuffer (SSlice* pSlice, CMemoryAlign* pMa);
+
+int32_t InitSliceBsBuffer (SSlice* pSlice,
+                           SBitStringAux* pBsWrite,
+                           bool bIndependenceBsBuffer,
+                           const int32_t iMaxSliceBufferSize,
+                           CMemoryAlign* pMa);
+
+void FreeSliceBuffer(SSlice*& pSliceList,
+                     const int32_t kiMaxSliceNum,
+                     CMemoryAlign* pMa,
+                     const char* kpTag);
+
+//slice encoding process
 int32_t WelsCodePSlice (sWelsEncCtx* pEncCtx, SSlice* pSlice);
 int32_t WelsCodePOverDynamicSlice (sWelsEncCtx* pEncCtx, SSlice* pSlice);
 
--- a/codec/encoder/core/src/encoder_ext.cpp
+++ b/codec/encoder/core/src/encoder_ext.cpp
@@ -894,62 +894,15 @@
 
   return 0;
 }
+void FreeSliceInLayer (SDqLayer* pDq, CMemoryAlign* pMa) {
+  int32_t iIdx = 0;
 
-int32_t AllocMbCacheAligned (SMbCache* pMbCache, CMemoryAlign* pMa) {
-  pMbCache->pCoeffLevel = (int16_t*)pMa->WelsMallocz (MB_COEFF_LIST_SIZE * sizeof (int16_t), "pMbCache->pCoeffLevel");
-  WELS_VERIFY_RETURN_IF (1, (NULL == pMbCache->pCoeffLevel));
-  pMbCache->pMemPredMb = (uint8_t*)pMa->WelsMallocz (2 * 256 * sizeof (uint8_t), "pMbCache->pMemPredMb");
-  WELS_VERIFY_RETURN_IF (1, (NULL == pMbCache->pMemPredMb));
-  pMbCache->pSkipMb = (uint8_t*)pMa->WelsMallocz (384 * sizeof (uint8_t), "pMbCache->pSkipMb");
-  WELS_VERIFY_RETURN_IF (1, (NULL == pMbCache->pSkipMb));
-  pMbCache->pMemPredBlk4 = (uint8_t*)pMa->WelsMallocz (2 * 16 * sizeof (uint8_t), "pMbCache->pMemPredBlk4");
-  WELS_VERIFY_RETURN_IF (1, (NULL == pMbCache->pMemPredBlk4));
-  pMbCache->pBufferInterPredMe = (uint8_t*)pMa->WelsMallocz (4 * 640 * sizeof (uint8_t), "pMbCache->pBufferInterPredMe");
-  WELS_VERIFY_RETURN_IF (1, (NULL == pMbCache->pBufferInterPredMe));
-  pMbCache->pPrevIntra4x4PredModeFlag = (bool*)pMa->WelsMallocz (16 * sizeof (bool),
-                                        "pMbCache->pPrevIntra4x4PredModeFlag");
-  WELS_VERIFY_RETURN_IF (1, (NULL == pMbCache->pPrevIntra4x4PredModeFlag));
-  pMbCache->pRemIntra4x4PredModeFlag = (int8_t*)pMa->WelsMallocz (16 * sizeof (int8_t),
-                                       "pMbCache->pRemIntra4x4PredModeFlag");
-  WELS_VERIFY_RETURN_IF (1, (NULL == pMbCache->pRemIntra4x4PredModeFlag));
-  pMbCache->pDct = (SDCTCoeff*)pMa->WelsMallocz (sizeof (SDCTCoeff), "pMbCache->pDct");
-  WELS_VERIFY_RETURN_IF (1, (NULL == pMbCache->pDct));
-  return 0;
-}
-
-void FreeMbCache (SMbCache* pMbCache, CMemoryAlign* pMa) {
-  if (NULL != pMbCache->pCoeffLevel) {
-    pMa->WelsFree (pMbCache->pCoeffLevel, "pMbCache->pCoeffLevel");
-    pMbCache->pCoeffLevel = NULL;
+  FreeSliceBuffer (pDq->sLayerInfo.pSliceInLayer, pDq->iMaxSliceNum, pMa, "pSliceInLayer");
+  for (; iIdx < MAX_THREADS_NUM; iIdx ++) {
+    FreeSliceBuffer (pDq->sSliceThreadInfo.pSliceInThread[iIdx],
+                     pDq->sSliceThreadInfo.iMaxSliceNumInThread[iIdx],
+                     pMa, "pSliceInLayer");
   }
-  if (NULL != pMbCache->pMemPredMb) {
-    pMa->WelsFree (pMbCache->pMemPredMb, "pMbCache->pMemPredMb");
-    pMbCache->pMemPredMb = NULL;
-  }
-  if (NULL != pMbCache->pSkipMb) {
-    pMa->WelsFree (pMbCache->pSkipMb, "pMbCache->pSkipMb");
-    pMbCache->pSkipMb = NULL;
-  }
-  if (NULL != pMbCache->pMemPredBlk4) {
-    pMa->WelsFree (pMbCache->pMemPredBlk4, "pMbCache->pMemPredBlk4");
-    pMbCache->pMemPredBlk4 = NULL;
-  }
-  if (NULL != pMbCache->pBufferInterPredMe) {
-    pMa->WelsFree (pMbCache->pBufferInterPredMe, "pMbCache->pBufferInterPredMe");
-    pMbCache->pBufferInterPredMe = NULL;
-  }
-  if (NULL != pMbCache->pPrevIntra4x4PredModeFlag) {
-    pMa->WelsFree (pMbCache->pPrevIntra4x4PredModeFlag, "pMbCache->pPrevIntra4x4PredModeFlag");
-    pMbCache->pPrevIntra4x4PredModeFlag = NULL;
-  }
-  if (NULL != pMbCache->pRemIntra4x4PredModeFlag) {
-    pMa->WelsFree (pMbCache->pRemIntra4x4PredModeFlag, "pMbCache->pRemIntra4x4PredModeFlag");
-    pMbCache->pRemIntra4x4PredModeFlag = NULL;
-  }
-  if (NULL != pMbCache->pDct) {
-    pMa->WelsFree (pMbCache->pDct, "pMbCache->pDct");
-    pMbCache->pDct = NULL;
-  }
 }
 
 void FreeDqLayer (SDqLayer*& pDq, CMemoryAlign* pMa) {
@@ -957,23 +910,8 @@
     return;
   }
 
-  if (NULL != pDq->sLayerInfo.pSliceInLayer) {
-    int32_t iSliceIdx = 0;
-    while (iSliceIdx < pDq->iMaxSliceNum) {
-      SSlice* pSlice = &pDq->sLayerInfo.pSliceInLayer[iSliceIdx];
-      FreeMbCache (&pSlice->sMbCacheInfo, pMa);
+  FreeSliceInLayer (pDq, pMa);
 
-      //slice bs buffer
-      if (NULL != pSlice->sSliceBs.pBs) {
-        pMa->WelsFree (pSlice->sSliceBs.pBs, "sSliceBs.pBs");
-        pSlice->sSliceBs.pBs = NULL;
-      }
-      ++ iSliceIdx;
-    }
-    pMa->WelsFree (pDq->sLayerInfo.pSliceInLayer, "pSliceInLayer");
-    pDq->sLayerInfo.pSliceInLayer = NULL;
-  }
-
   if (pDq->pNumSliceCodedOfPartition) {
     pMa->WelsFree (pDq->pNumSliceCodedOfPartition, "pNumSliceCodedOfPartition");
     pDq->pNumSliceCodedOfPartition = NULL;
@@ -1165,47 +1103,138 @@
   return INVALID_ID;
 }
 
-static inline int32_t InitpSliceInLayer (sWelsEncCtx** ppCtx, SDqLayer* pDqLayer, CMemoryAlign* pMa,
-    const int32_t iMaxSliceNum, const int32_t kiDlayerIndex) {
-  int32_t iMaxSliceBufferSize  = (*ppCtx)->iSliceBufferSize[kiDlayerIndex];
-  int32_t iSliceIdx            = 0;
-  SliceModeEnum uiSliceMode    = (*ppCtx)->pSvcParam->sSpatialLayers[kiDlayerIndex].sSliceArgument.uiSliceMode;
+static inline int32_t InitSliceList (sWelsEncCtx** ppCtx,
+                                     SDqLayer* pDqLayer,
+                                     SSlice* pSliceList,
+                                     const int32_t kiMaxSliceNum,
+                                     const int32_t kiDlayerIndex,
+                                     CMemoryAlign* pMa) {
+  const int32_t kiMBWidth         = pDqLayer->iMbWidth;
+  const int32_t kiMBHeight        = pDqLayer->iMbHeight;
+  SSliceArgument* pSliceArgument  = & (*ppCtx)->pSvcParam->sSpatialLayers[kiDlayerIndex].sSliceArgument;
+  int32_t iMaxSliceBufferSize     = (*ppCtx)->iSliceBufferSize[kiDlayerIndex];
+  int32_t iSliceIdx               = 0;
+  int32_t iRet                    = 0;
 
   //SM_SINGLE_SLICE mode using single-thread bs writer pOut->sBsWrite
   //even though multi-thread is on for other layers
   bool bIndependenceBsBuffer   = ((*ppCtx)->pSvcParam->iMultipleThreadIdc > 1 &&
-                                  SM_SINGLE_SLICE != uiSliceMode) ? true : false;
+                                  SM_SINGLE_SLICE != pSliceArgument->uiSliceMode) ? true : false;
 
-  if (iMaxSliceBufferSize <= 0) {
+  if (iMaxSliceBufferSize <= 0 || kiMBWidth <= 0 || kiMBHeight <= 0) {
     return ENC_RETURN_UNEXPECTED;
   }
 
-  while (iSliceIdx < iMaxSliceNum) {
-    SSlice* pSlice = &pDqLayer->sLayerInfo.pSliceInLayer[iSliceIdx];
+  while (iSliceIdx < kiMaxSliceNum) {
+    SSlice* pSlice = pSliceList + iSliceIdx;
+    if (NULL == pSlice)
+      return ENC_RETURN_MEMALLOCERR;
 
     pSlice->uiSliceIdx       = iSliceIdx;
-    pSlice->sSliceBs.uiSize  = iMaxSliceBufferSize;
-    pSlice->sSliceBs.uiBsPos = 0;
-    if (bIndependenceBsBuffer) {
-      pSlice->pSliceBsa      = &pSlice->sSliceBs.sBsWrite;
-      pSlice->sSliceBs.pBs   = (uint8_t*)pMa->WelsMalloc (iMaxSliceBufferSize, "SliceBs");
-      if (NULL == pSlice->sSliceBs.pBs) {
-        return ENC_RETURN_MEMALLOCERR;
-      }
-    } else {
-      pSlice->pSliceBsa      = & (*ppCtx)->pOut->sBsWrite;
-      pSlice->sSliceBs.pBs   = NULL;
-    }
 
-    if (AllocMbCacheAligned (&pSlice->sMbCacheInfo, pMa)) {
-      FreeMemorySvc (ppCtx);
-      return ENC_RETURN_MEMALLOCERR;
-    }
+    iRet = InitSliceBsBuffer (pSlice,
+                              & (*ppCtx)->pOut->sBsWrite,
+                              bIndependenceBsBuffer,
+                              iMaxSliceBufferSize,
+                              pMa);
+    if (ENC_RETURN_SUCCESS != iRet)
+      return iRet;
+
+    iRet = InitSliceMBInfo (pSliceArgument, pSlice,
+                            kiMBWidth, kiMBHeight,
+                            pMa);
+
+    if (ENC_RETURN_SUCCESS != iRet)
+      return iRet;
+
+    iRet = AllocateSliceMBBuffer (pSlice, pMa);
+
+    if (ENC_RETURN_SUCCESS != iRet)
+      return iRet;
+
     ++ iSliceIdx;
   }
   return ENC_RETURN_SUCCESS;
 }
 
+static inline int32_t InitSliceThreadInfo (sWelsEncCtx** ppCtx,
+    SDqLayer* pDqLayer,
+    const int32_t kiDlayerIndex,
+    CMemoryAlign* pMa) {
+
+  SSliceThreadInfo*  pSliceThreadInfo = &pDqLayer->sSliceThreadInfo;
+  int32_t iThreadNum                  = (*ppCtx)->pSvcParam->iMultipleThreadIdc;
+  int32_t iMaxSliceNumInThread        = 0;
+  int32_t iIdx                        = 0;
+  int32_t iRet                        = 0;
+
+  assert (iThreadNum > 0);
+  iMaxSliceNumInThread = ((*ppCtx)->iMaxSliceCount / iThreadNum + 1) * 2;
+  iMaxSliceNumInThread =  WELS_MIN ((*ppCtx)->iMaxSliceCount, (int) iMaxSliceNumInThread);
+
+  while (iIdx < iThreadNum) {
+    pSliceThreadInfo->iMaxSliceNumInThread[iIdx]     = iMaxSliceNumInThread;
+    pSliceThreadInfo->iEncodedSliceNumInThread[iIdx] = 0;
+    pSliceThreadInfo->pSliceInThread[iIdx]           = (SSlice*)pMa->WelsMallocz (sizeof (SSlice) *
+        iMaxSliceNumInThread, "pSliceInThread");
+    if(NULL == pSliceThreadInfo->pSliceInThread[iIdx])
+      return ENC_RETURN_MEMALLOCERR;
+
+    iRet = InitSliceList (ppCtx,
+                          pDqLayer,
+                          pSliceThreadInfo->pSliceInThread[iIdx],
+                          iMaxSliceNumInThread,
+                          kiDlayerIndex,
+                          pMa);
+    if (ENC_RETURN_SUCCESS != iRet)
+      return iRet;
+
+    iIdx++;
+  }
+
+  for (; iIdx < MAX_THREADS_NUM; iIdx++) {
+    pSliceThreadInfo->iMaxSliceNumInThread[iIdx]     = iMaxSliceNumInThread;
+    pSliceThreadInfo->iEncodedSliceNumInThread[iIdx] = 0;
+    pSliceThreadInfo->pSliceInThread[iIdx]           = NULL;
+  }
+  return ENC_RETURN_SUCCESS;
+}
+
+static inline int32_t InitSliceInLayer (sWelsEncCtx** ppCtx,
+                                        SDqLayer* pDqLayer,
+                                        const int32_t kiDlayerIndex,
+                                        CMemoryAlign* pMa)  {
+
+  //SWelsSvcCodingParam* pParam   = (*ppCtx)->pSvcParam;
+  int32_t iRet                  = 0;
+  int32_t iMaxSliceNum          = pDqLayer->iMaxSliceNum;
+
+  //if (pParam->iMultipleThreadIdc > 1) {
+   // to do, will add later, slice buffer allocated based on thread mode if() else ()
+  InitSliceThreadInfo (ppCtx,
+                       pDqLayer,
+                       kiDlayerIndex,
+                       pMa);
+  if (ENC_RETURN_SUCCESS != iRet)
+    return iRet;
+
+  //} else {
+  pDqLayer->sLayerInfo.pSliceInLayer = (SSlice*)pMa->WelsMallocz (sizeof (SSlice) * iMaxSliceNum, "pSliceInLayer");
+  if(NULL == pDqLayer->sLayerInfo.pSliceInLayer)
+    return ENC_RETURN_MEMALLOCERR;
+
+  InitSliceList (ppCtx,
+                 pDqLayer,
+                 pDqLayer->sLayerInfo.pSliceInLayer,
+                 iMaxSliceNum,
+                 kiDlayerIndex,
+                 pMa);
+  if (ENC_RETURN_SUCCESS != iRet)
+    return iRet;
+  //}
+
+  return ENC_RETURN_SUCCESS;
+}
 /*!
  * \brief   initialize ppDqLayerList and slicepEncCtx_list due to count number of layers available
  * \pParam  pCtx            sWelsEncCtx*
@@ -1309,12 +1338,12 @@
     if (iMaxSliceNum < kiSliceNum)
       iMaxSliceNum = kiSliceNum;
     pDqLayer->iMaxSliceNum = iMaxSliceNum;
-    {
-      pDqLayer->sLayerInfo.pSliceInLayer = (SSlice*)pMa->WelsMallocz (sizeof (SSlice) * iMaxSliceNum, "pSliceInLayer");
-      WELS_VERIFY_RETURN_PROC_IF (1, (NULL == pDqLayer->sLayerInfo.pSliceInLayer), FreeDqLayer (pDqLayer, pMa))
 
-      int32_t iReturn = InitpSliceInLayer (ppCtx, pDqLayer, pMa, iMaxSliceNum, iDlayerIndex);
-      WELS_VERIFY_RETURN_PROC_IF (1, (ENC_RETURN_SUCCESS != iReturn), FreeDqLayer (pDqLayer, pMa))
+    iResult = InitSliceInLayer (ppCtx, pDqLayer, iDlayerIndex, pMa);
+    if (iResult) {
+      WelsLog (& (*ppCtx)->sLogCtx, WELS_LOG_WARNING, "InitDqLayers(), InitSliceInLayer failed(%d)!", iResult);
+      FreeDqLayer (pDqLayer, pMa);
+      return iResult;
     }
 
     //deblocking parameters initialization
@@ -3769,8 +3798,7 @@
   pCtx->iEncoderError = ENC_RETURN_SUCCESS;
   pCtx->bCurFrameMarkedAsSceneLtr = false;
   pFbi->iLayerNum = 0; // for initialization
-  pFbi->uiTimeStamp = GetTimestampForRc (pSrcPic->uiTimeStamp, pCtx->uiLastTimestamp,
-                                         pCtx->pSvcParam->sSpatialLayers[pCtx->pSvcParam->iSpatialLayerNum - 1].fFrameRate);
+  pFbi->uiTimeStamp = GetTimestampForRc (pSrcPic->uiTimeStamp, pCtx->uiLastTimestamp, pCtx->pSvcParam->sSpatialLayers[pCtx->pSvcParam->iSpatialLayerNum - 1].fFrameRate);
   for (int32_t iNalIdx = 0; iNalIdx < MAX_LAYER_NUM_OF_FRAME; iNalIdx++) {
     pFbi->sLayerInfo[iNalIdx].eFrameType = videoFrameTypeSkip;
   }
--- a/codec/encoder/core/src/slice_multi_threading.cpp
+++ b/codec/encoder/core/src/slice_multi_threading.cpp
@@ -271,6 +271,7 @@
 
   assert (iThreadNum > 0);
   iMaxSliceNumInThread = ((*ppCtx)->iMaxSliceCount / iThreadNum + 1) * 2;
+  iMaxSliceNumInThread =  WELS_MIN ((*ppCtx)->iMaxSliceCount, (int) iMaxSliceNumInThread);
 
   pSmt = (SSliceThreading*)pMa->WelsMalloc (sizeof (SSliceThreading), "SSliceThreading");
   WELS_VERIFY_RETURN_PROC_IF (1, (NULL == pSmt), FreeMemorySvc (ppCtx))
@@ -307,8 +308,6 @@
     pSmt->pThreadPEncCtx[iIdx].pWelsPEncCtx   = (void*) *ppCtx;
     pSmt->pThreadPEncCtx[iIdx].iSliceIndex    = iIdx;
     pSmt->pThreadPEncCtx[iIdx].iThreadIndex   = iIdx;
-    pSmt->iMaxSliceNumInThread[iIdx]          = iMaxSliceNumInThread;
-    pSmt->iEncodedSliceNumInThread[iIdx]      = 0;
     pSmt->pThreadHandles[iIdx]                = 0;
 
     WelsSnprintf (name, SEM_NAME_MAX, "ee%d%s", iIdx, pSmt->eventNamespace);
@@ -332,19 +331,10 @@
     err = WelsEventOpen (&pSmt->pReadySliceCodingEvent[iIdx], name);
     MT_TRACE_LOG (pLogCtx, WELS_LOG_INFO, "[MT] Open pReadySliceCodingEvent%d = 0x%p named(%s) ret%d err%d", iIdx,
                   (void*)pSmt->pReadySliceCodingEvent[iIdx], name, err, errno);
-
-    pSmt->pSliceInThread[iIdx] = (SSlice*)pMa->WelsMalloc (sizeof (SSlice)*iMaxSliceNumInThread, "pSmt->pSliceInThread");
-    WELS_VERIFY_RETURN_PROC_IF (1, (NULL == pSmt->pSliceInThread[iIdx]), FreeMemorySvc (ppCtx))
-
-    pSmt->piSliceIndexInThread[iIdx] = (int32_t *)pMa->WelsMalloc (iMaxSliceNumInThread, "pSmt->piSliceIndexInThread");
-    WELS_VERIFY_RETURN_PROC_IF (1, (NULL == pSmt->piSliceIndexInThread[iIdx]), FreeMemorySvc (ppCtx))
-
     ++ iIdx;
   }
   for (; iIdx < MAX_THREADS_NUM; iIdx++) {
     pSmt->pThreadBsBuffer[iIdx]      = NULL;
-    pSmt->pSliceInThread[iIdx]       = NULL;
-    pSmt->piSliceIndexInThread[iIdx] = NULL;
   }
 
   WelsSnprintf (name, SEM_NAME_MAX, "scm%s", pSmt->eventNamespace);
@@ -431,16 +421,6 @@
     if (pSmt->pThreadBsBuffer[i]) {
       pMa->WelsFree (pSmt->pThreadBsBuffer[i], "pSmt->pThreadBsBuffer");
       pSmt->pThreadBsBuffer[i] = NULL;
-    }
-
-    if (pSmt->pSliceInThread[i]) {
-      pMa->WelsFree (pSmt->pSliceInThread[i], "pSmt->pSliceInThread");
-      pSmt->pSliceInThread[i] = NULL;
-    }
-
-    if (pSmt->piSliceIndexInThread[i]) {
-      pMa->WelsFree (pSmt->piSliceIndexInThread[i], "pSmt->piSliceIndexInThread");
-      pSmt->piSliceIndexInThread[i] = NULL;
     }
   }
   memset (&pSmt->bThreadBsBufferUsage, 0, MAX_THREADS_NUM * sizeof (bool));
--- a/codec/encoder/core/src/svc_enc_slice_segment.cpp
+++ b/codec/encoder/core/src/svc_enc_slice_segment.cpp
@@ -68,10 +68,8 @@
  * \return  0 - successful; none 0 - failed
  */
 int32_t AssignMbMapMultipleSlices (SDqLayer* pCurDq,const SSliceArgument* kpSliceArgument) {
-  SSliceCtx* pSliceSeg             = &pCurDq->sSliceEncCtx;
-  SSlice* pSliceInLayer            = pCurDq->sLayerInfo.pSliceInLayer;
-  SSliceHeaderExt* pSliceHeaderExt = NULL;
-  int32_t iSliceIdx  = 0;
+  SSliceCtx* pSliceSeg   = &pCurDq->sSliceEncCtx;
+  int32_t iSliceIdx      = 0;
   if (NULL == pSliceSeg || SM_SINGLE_SLICE == pSliceSeg->uiSliceMode)
     return 1;
 
@@ -82,9 +80,6 @@
     iSliceIdx = 0;
     while (iSliceIdx < iSliceNum) {
       const int32_t kiFirstMb                       = iSliceIdx * kiMbWidth;
-      SSliceHeaderExt* pSliceHeaderExt              = &pSliceInLayer[iSliceIdx].sSliceHeaderExt;
-      pSliceInLayer[iSliceIdx].iCountMbNumInSlice   = kiMbWidth;
-      pSliceHeaderExt->sSliceHeader.iFirstMbInSlice = kiFirstMb;
       WelsSetMemMultiplebytes_c(pSliceSeg->pOverallMbMap + kiFirstMb, iSliceIdx,
                                 kiMbWidth, sizeof(uint16_t));
       ++ iSliceIdx;
@@ -102,9 +97,6 @@
     do {
       const int32_t kiCurRunLength                  = kpSlicesAssignList[iSliceIdx];
       int32_t iRunIdx                               = 0;
-      pSliceHeaderExt                               = &pSliceInLayer[iSliceIdx].sSliceHeaderExt;
-      pSliceHeaderExt->sSliceHeader.iFirstMbInSlice = iMbIdx;
-      pSliceInLayer[iSliceIdx].iCountMbNumInSlice   = kiCurRunLength;
 
       // due here need check validate mb_assign_map for input pData, can not use memset
       do {
@@ -116,16 +108,7 @@
       ++ iSliceIdx;
     } while (iSliceIdx < kiCountSliceNumInFrame && iMbIdx < kiCountNumMbInFrame);
   } else if (SM_SIZELIMITED_SLICE == pSliceSeg->uiSliceMode) {
-    const int32_t kiMaxSliceNum       = pSliceSeg->iMaxSliceNumConstraint;
-    const int32_t kiCountNumMbInFrame = pSliceSeg->iMbNumInFrame;
-
-    iSliceIdx = 0;
-    do {
-      pSliceHeaderExt                                 = &pSliceInLayer[iSliceIdx].sSliceHeaderExt;
-      pSliceHeaderExt->sSliceHeader.iFirstMbInSlice   = 0;
-      pSliceInLayer[iSliceIdx].iCountMbNumInSlice     = kiCountNumMbInFrame;
-      iSliceIdx++;
-    } while (iSliceIdx < kiMaxSliceNum);
+     // do nothing,pSliceSeg->pOverallMbMap will be initial later
   } else { // any else uiSliceMode?
     assert (0);
   }
--- a/codec/encoder/core/src/svc_encode_slice.cpp
+++ b/codec/encoder/core/src/svc_encode_slice.cpp
@@ -89,7 +89,7 @@
 void WelsSliceHeaderExtInit (sWelsEncCtx* pEncCtx, SDqLayer* pCurLayer, SSlice* pSlice) {
   SSliceHeaderExt* pCurSliceExt = &pSlice->sSliceHeaderExt;
   SSliceHeader* pCurSliceHeader  = &pCurSliceExt->sSliceHeader;
-  SSpatialLayerInternal *pParamInternal = &pEncCtx->pSvcParam->sDependencyLayers[pEncCtx->uiDependencyId];
+  SSpatialLayerInternal* pParamInternal = &pEncCtx->pSvcParam->sDependencyLayers[pEncCtx->uiDependencyId];
   pCurSliceHeader->eSliceType = pEncCtx->eSliceType;
 
   pCurSliceExt->bStoreRefBasePicFlag = false;
@@ -138,7 +138,7 @@
 }
 
 
-void UpdateMbNeighbor(SDqLayer* pCurDq, SMB* pMb, const int32_t kiMbWidth, uint16_t uiSliceIdc) {
+void UpdateMbNeighbor (SDqLayer* pCurDq, SMB* pMb, const int32_t kiMbWidth, uint16_t uiSliceIdc) {
   uint32_t uiNeighborAvailFlag        = 0;
   const int32_t kiMbXY                = pMb->iMbXY;
   const int32_t kiMbX                 = pMb->iMbX;
@@ -298,7 +298,7 @@
   if (P_SLICE == pSliceHeader->eSliceType) {
     BsWriteOneBit (pBs, pSliceHeader->bNumRefIdxActiveOverrideFlag);
     if (pSliceHeader->bNumRefIdxActiveOverrideFlag) {
-      BsWriteUE (pBs, WELS_CLIP3(pSliceHeader->uiNumRefIdxL0Active - 1, 0, MAX_REF_PIC_COUNT));
+      BsWriteUE (pBs, WELS_CLIP3 (pSliceHeader->uiNumRefIdxL0Active - 1, 0, MAX_REF_PIC_COUNT));
     }
   }
 
@@ -366,7 +366,7 @@
   if (P_SLICE == pSliceHeader->eSliceType) {
     BsWriteOneBit (pBs, pSliceHeader->bNumRefIdxActiveOverrideFlag);
     if (pSliceHeader->bNumRefIdxActiveOverrideFlag) {
-      BsWriteUE (pBs, WELS_CLIP3(pSliceHeader->uiNumRefIdxL0Active - 1, 0, MAX_REF_PIC_COUNT));
+      BsWriteUE (pBs, WELS_CLIP3 (pSliceHeader->uiNumRefIdxL0Active - 1, 0, MAX_REF_PIC_COUNT));
     }
   }
 
@@ -521,7 +521,7 @@
 //second. lower than highest Dependency Layer, and for every Dependency Layer with one quality layer(single layer)
 int32_t WelsISliceMdEnc (sWelsEncCtx* pEncCtx, SSlice* pSlice) { //pMd + encoding
   SDqLayer* pCurLayer           = pEncCtx->pCurDqLayer;
- SMbCache* pMbCache            = &pSlice->sMbCacheInfo;
+  SMbCache* pMbCache            = &pSlice->sMbCacheInfo;
   SSliceHeaderExt* pSliceHdExt  = &pSlice->sSliceHeaderExt;
   SMB* pMbList                  = pCurLayer->sMbDataP;
   SMB* pCurMb                   = NULL;
@@ -746,7 +746,164 @@
   WelsSliceHeaderExtWrite
 };
 
+//Allocate slice's MB cache buffer
+int32_t AllocMbCacheAligned (SMbCache* pMbCache, CMemoryAlign* pMa) {
+  pMbCache->pCoeffLevel = (int16_t*)pMa->WelsMallocz (MB_COEFF_LIST_SIZE * sizeof (int16_t), "pMbCache->pCoeffLevel");
+  WELS_VERIFY_RETURN_IF (1, (NULL == pMbCache->pCoeffLevel));
+  pMbCache->pMemPredMb = (uint8_t*)pMa->WelsMallocz (2 * 256 * sizeof (uint8_t), "pMbCache->pMemPredMb");
+  WELS_VERIFY_RETURN_IF (1, (NULL == pMbCache->pMemPredMb));
+  pMbCache->pSkipMb = (uint8_t*)pMa->WelsMallocz (384 * sizeof (uint8_t), "pMbCache->pSkipMb");
+  WELS_VERIFY_RETURN_IF (1, (NULL == pMbCache->pSkipMb));
+  pMbCache->pMemPredBlk4 = (uint8_t*)pMa->WelsMallocz (2 * 16 * sizeof (uint8_t), "pMbCache->pMemPredBlk4");
+  WELS_VERIFY_RETURN_IF (1, (NULL == pMbCache->pMemPredBlk4));
+  pMbCache->pBufferInterPredMe = (uint8_t*)pMa->WelsMallocz (4 * 640 * sizeof (uint8_t), "pMbCache->pBufferInterPredMe");
+  WELS_VERIFY_RETURN_IF (1, (NULL == pMbCache->pBufferInterPredMe));
+  pMbCache->pPrevIntra4x4PredModeFlag = (bool*)pMa->WelsMallocz (16 * sizeof (bool),
+                                        "pMbCache->pPrevIntra4x4PredModeFlag");
+  WELS_VERIFY_RETURN_IF (1, (NULL == pMbCache->pPrevIntra4x4PredModeFlag));
+  pMbCache->pRemIntra4x4PredModeFlag = (int8_t*)pMa->WelsMallocz (16 * sizeof (int8_t),
+                                       "pMbCache->pRemIntra4x4PredModeFlag");
+  WELS_VERIFY_RETURN_IF (1, (NULL == pMbCache->pRemIntra4x4PredModeFlag));
+  pMbCache->pDct = (SDCTCoeff*)pMa->WelsMallocz (sizeof (SDCTCoeff), "pMbCache->pDct");
+  WELS_VERIFY_RETURN_IF (1, (NULL == pMbCache->pDct));
 
+  return 0;
+}
+
+//  Free slice's MB cache buffer
+void FreeMbCache (SMbCache* pMbCache, CMemoryAlign* pMa) {
+  if (NULL != pMbCache->pCoeffLevel) {
+    pMa->WelsFree (pMbCache->pCoeffLevel, "pMbCache->pCoeffLevel");
+    pMbCache->pCoeffLevel = NULL;
+  }
+  if (NULL != pMbCache->pMemPredMb) {
+    pMa->WelsFree (pMbCache->pMemPredMb, "pMbCache->pMemPredMb");
+    pMbCache->pMemPredMb = NULL;
+  }
+  if (NULL != pMbCache->pSkipMb) {
+    pMa->WelsFree (pMbCache->pSkipMb, "pMbCache->pSkipMb");
+    pMbCache->pSkipMb = NULL;
+  }
+  if (NULL != pMbCache->pMemPredBlk4) {
+    pMa->WelsFree (pMbCache->pMemPredBlk4, "pMbCache->pMemPredBlk4");
+    pMbCache->pMemPredBlk4 = NULL;
+  }
+  if (NULL != pMbCache->pBufferInterPredMe) {
+    pMa->WelsFree (pMbCache->pBufferInterPredMe, "pMbCache->pBufferInterPredMe");
+    pMbCache->pBufferInterPredMe = NULL;
+  }
+  if (NULL != pMbCache->pPrevIntra4x4PredModeFlag) {
+    pMa->WelsFree (pMbCache->pPrevIntra4x4PredModeFlag, "pMbCache->pPrevIntra4x4PredModeFlag");
+    pMbCache->pPrevIntra4x4PredModeFlag = NULL;
+  }
+  if (NULL != pMbCache->pRemIntra4x4PredModeFlag) {
+    pMa->WelsFree (pMbCache->pRemIntra4x4PredModeFlag, "pMbCache->pRemIntra4x4PredModeFlag");
+    pMbCache->pRemIntra4x4PredModeFlag = NULL;
+  }
+  if (NULL != pMbCache->pDct) {
+    pMa->WelsFree (pMbCache->pDct, "pMbCache->pDct");
+    pMbCache->pDct = NULL;
+  }
+}
+
+//Initialize slice's MB info)
+int32_t InitSliceMBInfo (SSliceArgument* pSliceArgument,
+                         SSlice* pSlice,
+                         const int32_t kiMBWidth,
+                         const int32_t kiMBHeight,
+                         CMemoryAlign* pMa) {
+  SSliceHeader* pSliceHeader          = &pSlice->sSliceHeaderExt.sSliceHeader;
+  const int32_t* kpSlicesAssignList   = (int32_t*) & (pSliceArgument->uiSliceMbNum[0]);
+  const int32_t kiCountNumMbInFrame   = kiMBWidth * kiMBHeight;
+  const int32_t kiSliceIdx            = pSlice->uiSliceIdx;
+  int32_t iFirstMBInSlice             = 0;
+  int32_t iMbNumInSlice               = 0;
+
+  if (SM_SINGLE_SLICE == pSliceArgument->uiSliceMode) {
+    iFirstMBInSlice = 0;
+    iMbNumInSlice   = kiCountNumMbInFrame;
+
+  } else if ((SM_RASTER_SLICE == pSliceArgument->uiSliceMode) && (0 == pSliceArgument->uiSliceMbNum[0])) {
+    iFirstMBInSlice = kiSliceIdx * kiMBWidth;
+    iMbNumInSlice   = kiMBWidth;
+  } else if (SM_RASTER_SLICE  == pSliceArgument->uiSliceMode ||
+             SM_FIXEDSLCNUM_SLICE == pSliceArgument->uiSliceMode) {
+    int32_t iMbIdx  = 0;
+    for (int i = 0; i < kiSliceIdx; i++) {
+      iMbIdx += kpSlicesAssignList[i];
+    }
+
+    if (iMbIdx >= kiCountNumMbInFrame)
+      return ENC_RETURN_UNEXPECTED;
+
+    iFirstMBInSlice = iMbIdx;
+    iMbNumInSlice   = kpSlicesAssignList[kiSliceIdx];
+
+  } else if (SM_SIZELIMITED_SLICE == pSliceArgument->uiSliceMode) {
+    iFirstMBInSlice  = 0;
+    iMbNumInSlice    = kiCountNumMbInFrame;
+
+  } else { // any else uiSliceMode?
+    assert (0);
+  }
+
+  pSlice->iCountMbNumInSlice    = iMbNumInSlice;
+  pSliceHeader->iFirstMbInSlice = iFirstMBInSlice;
+
+  return ENC_RETURN_SUCCESS;
+}
+
+//Allocate slice's MB info buffer
+int32_t AllocateSliceMBBuffer (SSlice* pSlice, CMemoryAlign* pMa) {
+  if (AllocMbCacheAligned (&pSlice->sMbCacheInfo, pMa)) {
+    return ENC_RETURN_MEMALLOCERR;
+  }
+
+  return ENC_RETURN_SUCCESS;
+}
+
+// Initialize slice bs buffer info
+int32_t InitSliceBsBuffer (SSlice* pSlice,
+                           SBitStringAux* pBsWrite,
+                           bool bIndependenceBsBuffer,
+                           const int32_t iMaxSliceBufferSize,
+                           CMemoryAlign* pMa) {
+  pSlice->sSliceBs.uiSize  = iMaxSliceBufferSize;
+  pSlice->sSliceBs.uiBsPos = 0;
+
+  if (bIndependenceBsBuffer) {
+    pSlice->pSliceBsa      = &pSlice->sSliceBs.sBsWrite;
+    pSlice->sSliceBs.pBs   = (uint8_t*)pMa->WelsMalloc (iMaxSliceBufferSize, "SliceBs");
+    if (NULL == pSlice->sSliceBs.pBs) {
+      return ENC_RETURN_MEMALLOCERR;
+    }
+  } else {
+    pSlice->pSliceBsa      = pBsWrite;
+    pSlice->sSliceBs.pBs   = NULL;
+  }
+  return ENC_RETURN_SUCCESS;
+}
+
+//free slice bs buffer
+void FreeSliceBuffer (SSlice*& pSliceList, const int32_t kiMaxSliceNum, CMemoryAlign* pMa, const char* kpTag) {
+  if (NULL != pSliceList) {
+    int32_t iSliceIdx = 0;
+    while (iSliceIdx < kiMaxSliceNum) {
+      SSlice* pSlice = &pSliceList[iSliceIdx];
+      FreeMbCache (&pSlice->sMbCacheInfo, pMa);
+
+      //slice bs buffer
+      if (NULL != pSlice->sSliceBs.pBs) {
+        pMa->WelsFree (pSlice->sSliceBs.pBs, "sSliceBs.pBs");
+        pSlice->sSliceBs.pBs = NULL;
+      }
+      ++ iSliceIdx;
+    }
+    pMa->WelsFree (pSliceList, kpTag);
+    pSliceList = NULL;
+  }
+}
+
 int32_t WelsCodeOneSlice (sWelsEncCtx* pEncCtx, const int32_t kiSliceIdx, const int32_t kiNalType) {
   SDqLayer* pCurLayer                   = pEncCtx->pCurDqLayer;
   SNalUnitHeaderExt* pNalHeadExt        = &pCurLayer->sLayerInfo.sNalHeaderExt;
@@ -753,8 +910,8 @@
   SSlice* pCurSlice                     = &pCurLayer->sLayerInfo.pSliceInLayer[kiSliceIdx];
   SBitStringAux* pBs                    = pCurSlice->pSliceBsa;
   const int32_t kiDynamicSliceFlag      = (pEncCtx->pSvcParam->sSpatialLayers[pEncCtx->uiDependencyId].sSliceArgument.uiSliceMode
-                                       ==
-                                       SM_SIZELIMITED_SLICE);
+     ==
+     SM_SIZELIMITED_SLICE);
 
   assert (kiSliceIdx == (int) pCurSlice->uiSliceIdx);
 
@@ -805,7 +962,7 @@
   int32_t iIdx                  = kiFirstMbIdxOfNextSlice;
   int32_t iNextSliceFirstMbIdxRowStart = ((kiFirstMbIdxOfNextSlice % kiMbWidth) ? 1 : 0);
   int32_t iCountMbUpdate        = kiMbWidth +
-                                    iNextSliceFirstMbIdxRowStart; //need to update MB(iMbXY+1) to MB(iMbXY+1+row) in common case
+                                  iNextSliceFirstMbIdxRowStart; //need to update MB(iMbXY+1) to MB(iMbXY+1+row) in common case
   const int32_t kiEndMbNeedUpdate       = kiFirstMbIdxOfNextSlice + iCountMbUpdate;
   SMB* pMb = &pMbList[iIdx];