ref: 7c6445418b0eb9b3d0419f1d596ac4fc52d02d42
dir: /codec/encoder/core/src/encoder_ext.cpp/
/*! * \copy * Copyright (c) 2009-2013, Cisco Systems * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * * Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * * Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in * the documentation and/or other materials provided with the * distribution. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. * * * \file encoder_ext.c * * \brief core encoder for SVC * * \date 7/24/2009 Created * ************************************************************************************* */ #include <string.h> #include <stdlib.h> #include <assert.h> #include "encoder.h" #include "extern.h" #include "encoder_context.h" #include "typedefs.h" #include "wels_const.h" #include "wels_common_basis.h" #include "codec_def.h" #include "param_svc.h" #include "cpu_core.h" #include "cpu.h" #include "utils.h" #include "svc_enc_frame.h" #include "svc_enc_golomb.h" #include "svc_enc_slice_segment.h" #include "au_set.h" #include "picture_handle.h" #include "codec_app_def.h" #include "svc_base_layer_md.h" #include "svc_encode_slice.h" #include "decode_mb_aux.h" #include "deblocking.h" #include "rc.h" #include "ref_list_mgr_svc.h" #include "md.h" #include "ls_defines.h" #include "set_mb_syn_cavlc.h" #include "crt_util_safe_x.h" // Safe CRT routines like utils for cross platforms #include "array_stack_align.h" // for MT, 4/22/2010 #include "slice_multi_threading.h" #if defined(DYNAMIC_SLICE_ASSIGN) || defined(MT_DEBUG) #include "measure_time.h" #endif//DYNAMIC_SLICE_ASSIGN namespace WelsSVCEnc { int32_t WelsCodeOnePicPartition (sWelsEncCtx* pCtx, SLayerBSInfo* pLbi, int32_t* pNalIdxInLayer, int32_t* pLayerSize, int32_t iFirstMbInPartition, // first mb inclusive in partition int32_t iEndMbInPartition, // end mb exclusive in partition int32_t iStartSliceIdx ); /*! * \brief validate checking in parameter configuration * \pParam pParam SWelsSvcCodingParam* * \return successful - 0; otherwise none 0 for failed */ int32_t ParamValidation (SWelsSvcCodingParam* pCfg) { float fMaxFrameRate = 0.0f; const float fEpsn = 0.000001f; int32_t i = 0; int32_t iLastSpatialWidth = 0; int32_t iLastSpatialHeight = 0; float fLastFrameRateIn = 0.0f; float fLastFrameRateOut = 0.0f; SDLayerParam* pLastSpatialParam = NULL; assert (pCfg != NULL); for (i = 0; i < pCfg->iNumDependencyLayer; ++ i) { SDLayerParam* fDlp = &pCfg->sDependencyLayers[i]; if (fDlp->fOutputFrameRate > fDlp->fInputFrameRate || (fDlp->fInputFrameRate >= -fEpsn && fDlp->fInputFrameRate <= fEpsn) || (fDlp->fOutputFrameRate >= -fEpsn && fDlp->fOutputFrameRate <= fEpsn)) { #if defined (_DEBUG) fprintf (stderr, "Invalid settings in input frame rate(%.6f) or output frame rate(%.6f) of layer #%d config file..\n", fDlp->fInputFrameRate, fDlp->fOutputFrameRate, i); #endif return 1; } if (UINT_MAX == GetLogFactor (fDlp->fOutputFrameRate, fDlp->fInputFrameRate)) { #if defined (_DEBUG) fprintf (stderr, "Invalid settings in input frame rate(%.6f) and output frame rate(%.6f) of layer #%d config file: iResult of output frame rate divided by input frame rate should be power of 2(i.e,in/pOut=2^n)..\n", fDlp->fInputFrameRate, fDlp->fOutputFrameRate, i); #endif return 1; } } for (i = 0; i < pCfg->iNumDependencyLayer; ++ i) { SDLayerParam* fDlp = &pCfg->sDependencyLayers[i]; if (fDlp->fInputFrameRate > fMaxFrameRate) fMaxFrameRate = fDlp->fInputFrameRate; } if (fMaxFrameRate > fEpsn && (fMaxFrameRate - pCfg->fMaxFrameRate > fEpsn || fMaxFrameRate - pCfg->fMaxFrameRate < -fEpsn)) { pCfg->fMaxFrameRate = fMaxFrameRate; } for (i = 0; i < pCfg->iNumDependencyLayer; ++ i) { SDLayerParam* fDlp = &pCfg->sDependencyLayers[i]; pLastSpatialParam = fDlp; iLastSpatialWidth = fDlp->iFrameWidth; iLastSpatialHeight = fDlp->iFrameHeight; fLastFrameRateIn = fDlp->fInputFrameRate; fLastFrameRateOut = fDlp->fOutputFrameRate; } return 0; } int32_t ParamValidationExt (void* pParam) { SWelsSvcCodingParam* pCodingParam = (SWelsSvcCodingParam*)pParam; int8_t i = 0; int32_t iIdx = 0; assert (pCodingParam != NULL); if (NULL == pCodingParam) return 1; if (pCodingParam->iNumDependencyLayer < 1 || pCodingParam->iNumDependencyLayer > MAX_DEPENDENCY_LAYER) { #if defined (_DEBUG) fprintf (stderr, "ParamValidationExt(), monitor invalid pCodingParam->iNumDependencyLayer: %d!\n", pCodingParam->iNumDependencyLayer); #endif//#if _DEBUG return 1; } if (pCodingParam->iNumTemporalLayer < 1 || pCodingParam->iNumTemporalLayer > MAX_TEMPORAL_LEVEL) { #if defined (_DEBUG) fprintf (stderr, "ParamValidationExt(), monitor invalid pCodingParam->iNumTemporalLayer: %d!\n", pCodingParam->iNumTemporalLayer); #endif//#if _DEBUG return 1; } if (pCodingParam->uiGopSize < 1 || pCodingParam->uiGopSize > MAX_GOP_SIZE) { #if defined (_DEBUG) fprintf (stderr, "ParamValidationExt(), monitor invalid pCodingParam->uiGopSize: %d!\n", pCodingParam->uiGopSize); #endif//#if _DEBUG return 1; } if (pCodingParam->uiIntraPeriod && pCodingParam->uiIntraPeriod < pCodingParam->uiGopSize) { #if defined (_DEBUG) fprintf (stderr, "ParamValidationExt(), uiIntraPeriod(%d) should be not less than that of uiGopSize(%d) or -1 specified!\n", pCodingParam->uiIntraPeriod, pCodingParam->uiGopSize); #endif//#if _DEBUG return 1; } if (pCodingParam->uiIntraPeriod && (pCodingParam->uiIntraPeriod & (pCodingParam->uiGopSize - 1)) != 0) { #if defined (_DEBUG) fprintf (stderr, "ParamValidationExt(), uiIntraPeriod(%d) should be multiple of uiGopSize(%d) or -1 specified!\n", pCodingParam->uiIntraPeriod, pCodingParam->uiGopSize); #endif//#if _DEBUG return 1; } #ifdef MT_ENABLED //about iMultipleThreadIdc, bDeblockingParallelFlag, iLoopFilterDisableIdc, & uiSliceMode // (1) Single Thread // if (THREAD==1)//single thread // no parallel_deblocking: bDeblockingParallelFlag = 0; // (2) Multi Thread: see uiSliceMode decision if (pCodingParam->iMultipleThreadIdc == 1) { //now is single thread. no parallel deblocking, set flag=0 pCodingParam->bDeblockingParallelFlag = false; } else { pCodingParam->bDeblockingParallelFlag = true; } #else pCodingParam->bDeblockingParallelFlag = false; #endif//MT_ENABLED for (i = 0; i < pCodingParam->iNumDependencyLayer; ++ i) { SDLayerParam* fDlp = &pCodingParam->sDependencyLayers[i]; const int32_t kiPicWidth = fDlp->iFrameWidth; const int32_t kiPicHeight = fDlp->iFrameHeight; int32_t iMbWidth = 0; int32_t iMbHeight = 0; int32_t iMbNumInFrame = 0; int32_t iMaxSliceNum = MAX_SLICES_NUM; if (kiPicWidth <= 0 || kiPicHeight <= 0) { #if defined (_DEBUG) fprintf (stderr, "ParamValidationExt(), invalid %d x %d in dependency layer settings!\n", kiPicWidth, kiPicHeight); #endif//#if _DEBUG return 1; } if ((kiPicWidth & 0x0F) != 0 || (kiPicHeight & 0x0F) != 0) { #if defined (_DEBUG) fprintf (stderr, "ParamValidationExt(), in layer #%d iWidth x iHeight(%d x %d) both should be multiple of 16, can not support with arbitrary size currently!\n", i, kiPicWidth, kiPicHeight); #endif//#if _DEBUG return 1; } if (fDlp->sMso.uiSliceMode >= SM_RESERVED) { #if defined (_DEBUG) fprintf (stderr, "ParamValidationExt(), invalid uiSliceMode (%d) settings!\n", fDlp->sMso.uiSliceMode); #endif//#if _DEBUG return 1; } //check pSlice settings under multi-pSlice if (kiPicWidth <= 16 && kiPicHeight <= 16) { //only have one MB, set to single_slice fDlp->sMso.uiSliceMode = SM_SINGLE_SLICE; } switch (fDlp->sMso.uiSliceMode) { case SM_SINGLE_SLICE: fDlp->sMso.sSliceArgument.iSliceNum = 1; fDlp->sMso.sSliceArgument.uiSliceSizeConstraint = 0; fDlp->sMso.sSliceArgument.iSliceNum = 0; for (iIdx = 0; iIdx < MAX_SLICES_NUM; iIdx++) { fDlp->sMso.sSliceArgument.uiSliceMbNum[iIdx] = 0; } break; case SM_FIXEDSLCNUM_SLICE: { fDlp->sMso.sSliceArgument.uiSliceSizeConstraint = 0; iMbWidth = (kiPicWidth + 15) >> 4; iMbHeight = (kiPicHeight + 15) >> 4; iMbNumInFrame = iMbWidth * iMbHeight; iMaxSliceNum = MAX_SLICES_NUM; if (fDlp->sMso.sSliceArgument.iSliceNum <= 0 || fDlp->sMso.sSliceArgument.iSliceNum > iMaxSliceNum) { #if defined (_DEBUG) fprintf (stderr, "ParamValidationExt(), invalid uiSliceNum (%d) settings!\n", fDlp->sMso.sSliceArgument.iSliceNum); #endif//#if _DEBUG return 1; } if (fDlp->sMso.sSliceArgument.iSliceNum == 1) { #if defined (_DEBUG) fprintf (stderr, "ParamValidationExt(), uiSliceNum(%d) you set for SM_FIXEDSLCNUM_SLICE, now turn to SM_SINGLE_SLICE type!\n", fDlp->sMso.sSliceArgument.iSliceNum); #endif//#if _DEBUG fDlp->sMso.uiSliceMode = SM_SINGLE_SLICE; break; } if (pCodingParam->bEnableRc) { // multiple slices verify with gom //check uiSliceNum GomValidCheckSliceNum (iMbWidth, iMbHeight, (int32_t*)&fDlp->sMso.sSliceArgument.iSliceNum); assert (fDlp->sMso.sSliceArgument.iSliceNum > 1); //set uiSliceMbNum with current uiSliceNum GomValidCheckSliceMbNum (iMbWidth, iMbHeight, &fDlp->sMso.sSliceArgument); } else if (!CheckFixedSliceNumMultiSliceSetting (iMbNumInFrame, &fDlp->sMso.sSliceArgument)) { // verify interleave mode settings //check uiSliceMbNum with current uiSliceNum #if defined (_DEBUG) fprintf (stderr, "ParamValidationExt(), invalid uiSliceMbNum (%d) settings!\n", fDlp->sMso.sSliceArgument.uiSliceMbNum[0]); #endif//#if _DEBUG return 1; } // considering the coding efficient and performance, iCountMbNum constraint by MIN_NUM_MB_PER_SLICE condition of multi-pSlice mode settting if (iMbNumInFrame <= MIN_NUM_MB_PER_SLICE) { fDlp->sMso.uiSliceMode = SM_SINGLE_SLICE; fDlp->sMso.sSliceArgument.iSliceNum = 1; break; } } break; case SM_RASTER_SLICE: { fDlp->sMso.sSliceArgument.uiSliceSizeConstraint = 0; iMbWidth = (kiPicWidth + 15) >> 4; iMbHeight = (kiPicHeight + 15) >> 4; iMbNumInFrame = iMbWidth * iMbHeight; iMaxSliceNum = MAX_SLICES_NUM; if (fDlp->sMso.sSliceArgument.uiSliceMbNum[0] <= 0) { #if defined (_DEBUG) fprintf (stderr, "ParamValidationExt(), invalid uiSliceMbNum (%d) settings!\n", fDlp->sMso.sSliceArgument.uiSliceMbNum[0]); #endif//#if _DEBUG return 1; } if (!CheckRasterMultiSliceSetting (iMbNumInFrame, &fDlp->sMso.sSliceArgument)) { // verify interleave mode settings #if defined (_DEBUG) fprintf (stderr, "ParamValidationExt(), invalid uiSliceMbNum (%d) settings!\n", fDlp->sMso.sSliceArgument.uiSliceMbNum[0]); #endif//#if _DEBUG return 1; } if (fDlp->sMso.sSliceArgument.iSliceNum <= 0 || fDlp->sMso.sSliceArgument.iSliceNum > iMaxSliceNum) { // verify interleave mode settings #if defined (_DEBUG) fprintf (stderr, "ParamValidationExt(), invalid uiSliceNum (%d) in SM_RASTER_SLICE settings!\n", fDlp->sMso.sSliceArgument.iSliceNum); #endif//#if _DEBUG return 1; } if (fDlp->sMso.sSliceArgument.iSliceNum == 1) { #if defined (_DEBUG) fprintf (stderr, "ParamValidationExt(), pSlice setting for SM_RASTER_SLICE now turn to SM_SINGLE_SLICE!\n"); #endif//#if _DEBUG fDlp->sMso.uiSliceMode = SM_SINGLE_SLICE; break; } #ifdef MT_ENABLED if (pCodingParam->bEnableRc && fDlp->sMso.sSliceArgument.iSliceNum > 1) { #if defined (_DEBUG) fprintf (stderr, "ParamValidationExt(), WARNING: GOM based RC do not support SM_RASTER_SLICE!\n"); #endif//#if _DEBUG } #endif // considering the coding efficient and performance, iCountMbNum constraint by MIN_NUM_MB_PER_SLICE condition of multi-pSlice mode settting if (iMbNumInFrame <= MIN_NUM_MB_PER_SLICE) { fDlp->sMso.uiSliceMode = SM_SINGLE_SLICE; fDlp->sMso.sSliceArgument.iSliceNum = 1; break; } } break; case SM_ROWMB_SLICE: { fDlp->sMso.sSliceArgument.uiSliceSizeConstraint = 0; iMbWidth = (kiPicWidth + 15) >> 4; iMbHeight = (kiPicHeight + 15) >> 4; iMaxSliceNum = MAX_SLICES_NUM; if (iMbHeight > iMaxSliceNum) { #if defined (_DEBUG) fprintf (stderr, "ParamValidationExt(), invalid uiSliceNum (%d) settings more than MAX!\n", iMbHeight); #endif//#if _DEBUG return 1; } fDlp->sMso.sSliceArgument.iSliceNum = iMbHeight; if (fDlp->sMso.sSliceArgument.iSliceNum <= 0) { #if defined (_DEBUG) fprintf (stderr, "ParamValidationExt(), invalid uiSliceNum (%d) settings!\n", fDlp->sMso.sSliceArgument.iSliceNum); #endif//#if _DEBUG return 1; } if (!CheckRowMbMultiSliceSetting (iMbWidth, &fDlp->sMso.sSliceArgument)) { // verify interleave mode settings #if defined (_DEBUG) fprintf (stderr, "ParamValidationExt(), invalid uiSliceMbNum (%d) settings!\n", fDlp->sMso.sSliceArgument.uiSliceMbNum[0]); #endif//#if _DEBUG return 1; } } break; case SM_DYN_SLICE: { iMbWidth = (kiPicWidth + 15) >> 4; iMbHeight = (kiPicHeight + 15) >> 4; if (fDlp->sMso.sSliceArgument.uiSliceSizeConstraint <= 0) { #if defined (_DEBUG) fprintf (stderr, "ParamValidationExt(), invalid iSliceSize (%d) settings!\n", fDlp->sMso.sSliceArgument.uiSliceSizeConstraint); #endif//#if _DEBUG return 1; } // considering the coding efficient and performance, iCountMbNum constraint by MIN_NUM_MB_PER_SLICE condition of multi-pSlice mode settting if (iMbWidth * iMbHeight <= MIN_NUM_MB_PER_SLICE) { fDlp->sMso.uiSliceMode = SM_SINGLE_SLICE; fDlp->sMso.sSliceArgument.iSliceNum = 1; break; } } break; default: { #if defined (_DEBUG) fprintf (stderr, "ParamValidationExt(), invalid uiSliceMode (%d) settings!\n", pCodingParam->sDependencyLayers[0].sMso.uiSliceMode); #endif//#if _DEBUG return 1; } break; } } return ParamValidation (pCodingParam); } void WelsEncoderApplyFrameRate(SWelsSvcCodingParam* pParam) { SDLayerParam* pLayerParam; const float kfEpsn = 0.000001f; const int32_t kiNumLayer = pParam->iNumDependencyLayer; int32_t i; const float kfMaxFrameRate = pParam->fMaxFrameRate; float fRatio; float fTargetOutputFrameRate; //set input frame rate to each layer for (i=0;i<kiNumLayer;i++) { pLayerParam = &(pParam->sDependencyLayers[i]); fRatio = pLayerParam->fOutputFrameRate / pLayerParam->fInputFrameRate; if ( (kfMaxFrameRate - pLayerParam->fInputFrameRate) > kfEpsn || (kfMaxFrameRate - pLayerParam->fInputFrameRate) < -kfEpsn ) { pLayerParam->fInputFrameRate = kfMaxFrameRate; fTargetOutputFrameRate = kfMaxFrameRate*fRatio; pLayerParam->fOutputFrameRate = (fTargetOutputFrameRate>=6)?fTargetOutputFrameRate:(pLayerParam->fInputFrameRate); //TODO:{Sijia} from design, there is no sense to have temporal layer when under 6fps even with such setting? } } } void WelsEncoderApplyBitRate(SWelsSvcCodingParam* pParam) { //TODO (Sijia): this is a temporary solution which keep the ratio between layers //but it is also possible to fulfill the bitrate of lower layer first SDLayerParam* pLayerParam; const int32_t iNumLayers = pParam->iNumDependencyLayer; int32_t i, iOrigTotalBitrate=0; //read old BR for (i=0;i<iNumLayers;i++) { iOrigTotalBitrate += pParam->sDependencyLayers[i].iSpatialBitrate; } //write new BR float fRatio = 0.0; for (i=0;i<iNumLayers;i++) { pLayerParam = &(pParam->sDependencyLayers[i]); fRatio = pLayerParam->iSpatialBitrate/(static_cast<float>(iOrigTotalBitrate)); pLayerParam->iSpatialBitrate = static_cast<int32_t>(pParam->iTargetBitrate*fRatio); } } /*! * \brief acquire count number of layers and NALs based on configurable paramters dependency * \pParam pCtx sWelsEncCtx* * \pParam pParam SWelsSvcCodingParam* * \pParam pCountLayers pointer of count number of layers indeed * \pParam iCountNals pointer of count number of nals indeed * \return 0 - successful; otherwise failed */ static inline int32_t AcquireLayersNals (sWelsEncCtx** ppCtx, SWelsSvcCodingParam* pParam, int32_t* pCountLayers, int32_t* pCountNals) { int32_t iCountNumLayers = 0; int32_t iCountNumNals = 0; int32_t iNumDependencyLayers = 0; int32_t iDIndex = 0; #if defined(MT_ENABLED) && defined(PACKING_ONE_SLICE_PER_LAYER) int32_t iNumLayersPack = 0; #endif//MT_ENABLED && PACKING_ONE_SLICE_PER_LAYER if (NULL == pParam || NULL == ppCtx || NULL == *ppCtx) return 1; iNumDependencyLayers = pParam->iNumDependencyLayer; do { SDLayerParam* pDLayer = &pParam->sDependencyLayers[iDIndex]; // pDLayer->ptr_cfg = pParam; int32_t iOrgNumNals = iCountNumNals; //Note: Sep. 2010 //Review this part and suggest no change, since the memory over-use //(1) counts little to the overall performance //(2) should not be critial even under mobile case if (SM_DYN_SLICE == pDLayer->sMso.uiSliceMode) { iCountNumNals += MAX_SLICES_NUM; // plus prefix NALs if (iDIndex == 0) iCountNumNals += MAX_SLICES_NUM; // MAX_SLICES_NUM < MAX_LAYER_NUM_OF_FRAME ensured at svc_enc_slice_segment.h #if defined(MT_ENABLED) && defined(PACKING_ONE_SLICE_PER_LAYER) assert (MAX_SLICES_NUM < MAX_LAYER_NUM_OF_FRAME); // iNumLayersPack += MAX_SLICES_NUM; // do not count it for dynamic slicing mode #else//!MT_ENABLED || !PACKING_ONE_SLICE_PER_LAYER assert (iCountNumNals - iOrgNumNals <= MAX_NAL_UNITS_IN_LAYER); #endif//MT_ENABLED && PACKING_ONE_SLICE_PER_LAYER } else { /*if ( SM_SINGLE_SLICE != pDLayer->sMso.uiSliceMode )*/ const int32_t kiNumOfSlice = GetInitialSliceNum ((pDLayer->iFrameWidth + 0x0f) >> 4, (pDLayer->iFrameHeight + 0x0f) >> 4, &pDLayer->sMso); // NEED check iCountNals value in case multiple slices is used iCountNumNals += kiNumOfSlice; // for pSlice VCL NALs // plus prefix NALs if (iDIndex == 0) iCountNumNals += kiNumOfSlice; #if defined(MT_ENABLED) && defined(PACKING_ONE_SLICE_PER_LAYER) assert (num_of_slice <= MAX_SLICES_NUM && MAX_SLICES_NUM < MAX_LAYER_NUM_OF_FRAME); iNumLayersPack += num_of_slice; #else//!MT_ENABLED || !PACKING_ONE_SLICE_PER_LAYER assert (iCountNumNals - iOrgNumNals <= MAX_NAL_UNITS_IN_LAYER); #endif//MT_ENALBED && PACKING_ONE_SLICE_PER_LAYER if (kiNumOfSlice > MAX_SLICES_NUM) { WelsLog (*ppCtx, WELS_LOG_ERROR, "AcquireLayersNals(), num_of_slice(%d) > MAX_SLICES_NUM(%d) per (iDid= %d, qid= %d) settings!\n", kiNumOfSlice, MAX_SLICES_NUM, iDIndex, 0); return 1; } } #if !defined(MT_ENABLED) || !defined(PACKING_ONE_SLICE_PER_LAYER) if (iCountNumNals - iOrgNumNals > MAX_NAL_UNITS_IN_LAYER) { WelsLog (*ppCtx, WELS_LOG_ERROR, "AcquireLayersNals(), num_of_nals(%d) > MAX_NAL_UNITS_IN_LAYER(%d) per (iDid= %d, qid= %d) settings!\n", (iCountNumNals - iOrgNumNals), MAX_NAL_UNITS_IN_LAYER, iDIndex, 0); return 1; } #endif//!MT_ENABLED) || !PACKING_ONE_SLICE_PER_LAYER iCountNumLayers ++; ++ iDIndex; } while (iDIndex < iNumDependencyLayers); iCountNumNals += 1 + iNumDependencyLayers + (iCountNumLayers << 1) + iCountNumLayers; // plus iCountNumLayers for reserved application #if defined(MT_ENABLED) && defined(PACKING_ONE_SLICE_PER_LAYER) iNumLayersPack += 1 + iNumDependencyLayers + (iCountNumLayers << 1); #endif//MT_ENABLED && PACKING_ONE_SLICE_PER_LAYER // to check number of layers / nals / slices dependencies, 12/8/2010 #if !defined(MT_ENABLED) if (iCountNumLayers > MAX_LAYER_NUM_OF_FRAME) { WelsLog (*ppCtx, WELS_LOG_ERROR, "AcquireLayersNals(), iCountNumLayers(%d) > MAX_LAYER_NUM_OF_FRAME(%d)!", iCountNumLayers, MAX_LAYER_NUM_OF_FRAME); return 1; } #else//MT_ENABLED #if defined(PACKING_ONE_SLICE_PER_LAYER) if (iNumLayersPack > MAX_LAYER_NUM_OF_FRAME) { WelsLog (*ppCtx, WELS_LOG_ERROR, "AcquireLayersNals(), num_layers_pack_overall(%d) > MAX_LAYER_NUM_OF_FRAME(%d)!", iNumLayersPack, MAX_LAYER_NUM_OF_FRAME); return 1; } #else//!PACKING_ONE_SLICE_PER_LAYER if (iCountNumLayers > MAX_LAYER_NUM_OF_FRAME) { WelsLog (*ppCtx, WELS_LOG_ERROR, "AcquireLayersNals(), iCountNumLayers(%d) > MAX_LAYER_NUM_OF_FRAME(%d)!", iCountNumLayers, MAX_LAYER_NUM_OF_FRAME); return 1; } #endif//PACKING_ONE_SLICE_PER_LAYER #endif//!MT_ENABLED if (NULL != pCountLayers) *pCountLayers = iCountNumLayers; if (NULL != pCountNals) *pCountNals = iCountNumNals; return 0; } /*! * \brief alloc spatial layers pictures (I420 based source pictures) */ int32_t AllocSpatialPictures (sWelsEncCtx** ppCtx, SWelsSvcCodingParam* pParam) { CMemoryAlign* pMa = (*ppCtx)->pMemAlign; const int32_t kiDlayerCount = pParam->iNumDependencyLayer; int32_t iDlayerIndex = 0; // spatial pictures iDlayerIndex = 0; do { const int32_t kiPicWidth = pParam->sDependencyLayers[iDlayerIndex].iFrameWidth; const int32_t kiPicHeight = pParam->sDependencyLayers[iDlayerIndex].iFrameHeight; const uint8_t kuiLayerInTemporal = 2 + WELS_MAX (pParam->sDependencyLayers[iDlayerIndex].iHighestTemporalId, 1); const uint8_t kuiRefNumInTemporal = kuiLayerInTemporal + pParam->iLTRRefNum; uint8_t i = 0; do { SPicture* pPic = AllocPicture (pMa, kiPicWidth, kiPicHeight, false); WELS_VERIFY_RETURN_PROC_IF (1, (NULL == pPic), FreeMemorySvc (ppCtx); *ppCtx = NULL) (*ppCtx)->pSpatialPic[iDlayerIndex][i] = pPic; ++ i; } while (i < kuiRefNumInTemporal); (*ppCtx)->uiSpatialLayersInTemporal[iDlayerIndex] = kuiLayerInTemporal; (*ppCtx)->uiSpatialPicNum[iDlayerIndex] = kuiRefNumInTemporal; ++ iDlayerIndex; } while (iDlayerIndex < kiDlayerCount); return 0; } void FreeSpatialPictures (sWelsEncCtx* pCtx) { CMemoryAlign* pMa = pCtx->pMemAlign; int32_t j = 0; while (j < pCtx->pSvcParam->iNumDependencyLayer) { uint8_t i = 0; uint8_t uiRefNumInTemporal = pCtx->uiSpatialPicNum[j]; while (i < uiRefNumInTemporal) { if (NULL != pCtx->pSpatialPic[j][i]) { FreePicture (pMa, &pCtx->pSpatialPic[j][i]); } ++ i; } pCtx->uiSpatialLayersInTemporal[j] = 0; ++ j; } } static void InitMbInfo (sWelsEncCtx* pEnc, SMB* pList, SDqLayer* pLayer, const int32_t kiDlayerId, const int32_t kiMaxMbNum) { int32_t iMbWidth = pLayer->iMbWidth; int32_t iMbHeight = pLayer->iMbHeight; int32_t iIdx; int32_t iMbNum = iMbWidth * iMbHeight; SSliceCtx* pSliceCtx = pLayer->pSliceEncCtx; uint32_t uiNeighborAvail; const int32_t kiOffset = (kiDlayerId & 0x01) * kiMaxMbNum; SMVUnitXY (*pLayerMvUnitBlock4x4)[MB_BLOCK4x4_NUM] = (SMVUnitXY (*)[MB_BLOCK4x4_NUM]) ( &pEnc->pMvUnitBlock4x4[MB_BLOCK4x4_NUM * kiOffset]); int8_t (*pLayerRefIndexBlock8x8)[MB_BLOCK8x8_NUM] = (int8_t (*)[MB_BLOCK8x8_NUM]) ( &pEnc->pRefIndexBlock4x4[MB_BLOCK8x8_NUM * kiOffset]); for (iIdx = 0; iIdx < iMbNum; iIdx++) { BOOL_T bLeft; BOOL_T bTop; BOOL_T bLeftTop; BOOL_T bRightTop; int32_t iLeftXY, iTopXY, iLeftTopXY, iRightTopXY; uint8_t uiSliceIdc; pList[iIdx].iMbX = pEnc->pStrideTab->pMbIndexX[kiDlayerId][iIdx]; pList[iIdx].iMbY = pEnc->pStrideTab->pMbIndexY[kiDlayerId][iIdx]; pList[iIdx].iMbXY = iIdx; uiSliceIdc = WelsMbToSliceIdc (pSliceCtx, iIdx); iLeftXY = iIdx - 1; iTopXY = iIdx - iMbWidth; iLeftTopXY = iTopXY - 1; iRightTopXY = iTopXY + 1; bLeft = (pList[iIdx].iMbX > 0) && (uiSliceIdc == WelsMbToSliceIdc (pSliceCtx, iLeftXY)); bTop = (pList[iIdx].iMbY > 0) && (uiSliceIdc == WelsMbToSliceIdc (pSliceCtx, iTopXY)); bLeftTop = (pList[iIdx].iMbX > 0) && (pList[iIdx].iMbY > 0) && (uiSliceIdc == WelsMbToSliceIdc (pSliceCtx, iLeftTopXY)); bRightTop = (pList[iIdx].iMbX < (iMbWidth - 1)) && (pList[iIdx].iMbY > 0) && (uiSliceIdc == WelsMbToSliceIdc (pSliceCtx, iRightTopXY)); uiNeighborAvail = 0; if (bLeft) { uiNeighborAvail |= LEFT_MB_POS; } if (bTop) { uiNeighborAvail |= TOP_MB_POS; } if (bLeftTop) { uiNeighborAvail |= TOPLEFT_MB_POS; } if (bRightTop) { uiNeighborAvail |= TOPRIGHT_MB_POS; } pList[iIdx].uiSliceIdc = uiSliceIdc; // merge from svc_hd_opt_b for multiple slices coding pList[iIdx].uiNeighborAvail = uiNeighborAvail; uiNeighborAvail = 0; if (pList[iIdx].iMbX >= BASE_MV_MB_NMB) uiNeighborAvail |= LEFT_MB_POS; if (pList[iIdx].iMbX <= (iMbWidth - 1 - BASE_MV_MB_NMB)) uiNeighborAvail |= RIGHT_MB_POS; if (pList[iIdx].iMbY >= BASE_MV_MB_NMB) uiNeighborAvail |= TOP_MB_POS; if (pList[iIdx].iMbY <= (iMbHeight - 1 - BASE_MV_MB_NMB)) uiNeighborAvail |= BOTTOM_MB_POS; pList[iIdx].sMv = pLayerMvUnitBlock4x4[iIdx]; pList[iIdx].pRefIndex = pLayerRefIndexBlock8x8[iIdx]; pList[iIdx].pSadCost = &pEnc->pSadCostMb[iIdx]; pList[iIdx].pIntra4x4PredMode = &pEnc->pIntra4x4PredModeBlocks[iIdx * INTRA_4x4_MODE_NUM]; pList[iIdx].pNonZeroCount = &pEnc->pNonZeroCountBlocks[iIdx * MB_LUMA_CHROMA_BLOCK4x4_NUM]; } } int32_t InitMbListD (sWelsEncCtx** ppCtx) { int32_t iNumDlayer = (*ppCtx)->pSvcParam->iNumDependencyLayer; int32_t iMbSize[MAX_DEPENDENCY_LAYER] = { 0 }; int32_t iOverallMbNum = 0; int32_t iMbWidth = 0; int32_t iMbHeight = 0; int32_t i; if (iNumDlayer > MAX_DEPENDENCY_LAYER) return 1; for (i = 0; i < iNumDlayer; i++) { iMbWidth = ((*ppCtx)->pSvcParam->sDependencyLayers[i].iFrameWidth + 15) >> 4; iMbHeight = ((*ppCtx)->pSvcParam->sDependencyLayers[i].iFrameHeight + 15) >> 4; iMbSize[i] = iMbWidth * iMbHeight; iOverallMbNum += iMbSize[i]; } (*ppCtx)->ppMbListD = static_cast<SMB**> ((*ppCtx)->pMemAlign->WelsMalloc (iNumDlayer * sizeof (SMB*), "ppMbListD")); (*ppCtx)->ppMbListD[0] = NULL; WELS_VERIFY_RETURN_PROC_IF (1, (*ppCtx)->ppMbListD == NULL, FreeMemorySvc (ppCtx)); (*ppCtx)->ppMbListD[0] = static_cast<SMB*> ((*ppCtx)->pMemAlign->WelsMallocz (iOverallMbNum * sizeof (SMB), "ppMbListD[0]")); WELS_VERIFY_RETURN_PROC_IF (1, (*ppCtx)->ppMbListD[0] == NULL, FreeMemorySvc (ppCtx)); (*ppCtx)->ppDqLayerList[0]->sMbDataP = (*ppCtx)->ppMbListD[0]; InitMbInfo (*ppCtx, (*ppCtx)->ppMbListD[0], (*ppCtx)->ppDqLayerList[0], 0, iMbSize[iNumDlayer - 1]); for (i = 1; i < iNumDlayer; i++) { (*ppCtx)->ppMbListD[i] = (*ppCtx)->ppMbListD[i - 1] + iMbSize[i - 1]; (*ppCtx)->ppDqLayerList[i]->sMbDataP = (*ppCtx)->ppMbListD[i]; InitMbInfo (*ppCtx, (*ppCtx)->ppMbListD[i], (*ppCtx)->ppDqLayerList[i], i, iMbSize[iNumDlayer - 1]); } return 0; } int32_t AllocMbCacheAligned (SMbCache* pMbCache, CMemoryAlign* pMa) { pMbCache->pCoeffLevel = (int16_t*)pMa->WelsMalloc (MB_COEFF_LIST_SIZE * sizeof (int16_t), "pMbCache->pCoeffLevel"); WELS_VERIFY_RETURN_IF (1, (NULL == pMbCache->pCoeffLevel)); pMbCache->pMemPredMb = (uint8_t*)pMa->WelsMalloc (2 * 256 * sizeof (uint8_t), "pMbCache->pMemPredMb"); WELS_VERIFY_RETURN_IF (1, (NULL == pMbCache->pMemPredMb)); pMbCache->pSkipMb = (uint8_t*)pMa->WelsMalloc (384 * sizeof (uint8_t), "pMbCache->pSkipMb"); WELS_VERIFY_RETURN_IF (1, (NULL == pMbCache->pSkipMb)); pMbCache->pMemPredBlk4 = (uint8_t*)pMa->WelsMalloc (2 * 16 * sizeof (uint8_t), "pMbCache->pMemPredBlk4"); WELS_VERIFY_RETURN_IF (1, (NULL == pMbCache->pMemPredBlk4)); pMbCache->pBufferInterPredMe = (uint8_t*)pMa->WelsMalloc (4 * 640 * sizeof (uint8_t), "pMbCache->pBufferInterPredMe"); WELS_VERIFY_RETURN_IF (1, (NULL == pMbCache->pBufferInterPredMe)); pMbCache->pPrevIntra4x4PredModeFlag = (bool_t*)pMa->WelsMalloc (16 * sizeof (bool_t), "pMbCache->pPrevIntra4x4PredModeFlag"); WELS_VERIFY_RETURN_IF (1, (NULL == pMbCache->pPrevIntra4x4PredModeFlag)); pMbCache->pRemIntra4x4PredModeFlag = (int8_t*)pMa->WelsMalloc (16 * sizeof (int8_t), "pMbCache->pRemIntra4x4PredModeFlag"); WELS_VERIFY_RETURN_IF (1, (NULL == pMbCache->pRemIntra4x4PredModeFlag)); pMbCache->pDct = (SDCTCoeff*)pMa->WelsMalloc (sizeof (SDCTCoeff), "pMbCache->pDct"); WELS_VERIFY_RETURN_IF (1, (NULL == pMbCache->pDct)); return 0; } void FreeMbCache (SMbCache* pMbCache, CMemoryAlign* pMa) { if (NULL != pMbCache->pCoeffLevel) { pMa->WelsFree (pMbCache->pCoeffLevel, "pMbCache->pCoeffLevel"); pMbCache->pCoeffLevel = NULL; } if (NULL != pMbCache->pMemPredMb) { pMa->WelsFree (pMbCache->pMemPredMb, "pMbCache->pMemPredMb"); pMbCache->pMemPredMb = NULL; } if (NULL != pMbCache->pSkipMb) { pMa->WelsFree (pMbCache->pSkipMb, "pMbCache->pSkipMb"); pMbCache->pSkipMb = NULL; } if (NULL != pMbCache->pMemPredBlk4) { pMa->WelsFree (pMbCache->pMemPredBlk4, "pMbCache->pMemPredBlk4"); pMbCache->pMemPredBlk4 = NULL; } if (NULL != pMbCache->pBufferInterPredMe) { pMa->WelsFree (pMbCache->pBufferInterPredMe, "pMbCache->pBufferInterPredMe"); pMbCache->pBufferInterPredMe = NULL; } if (NULL != pMbCache->pPrevIntra4x4PredModeFlag) { pMa->WelsFree (pMbCache->pPrevIntra4x4PredModeFlag, "pMbCache->pPrevIntra4x4PredModeFlag"); pMbCache->pPrevIntra4x4PredModeFlag = NULL; } if (NULL != pMbCache->pRemIntra4x4PredModeFlag) { pMa->WelsFree (pMbCache->pRemIntra4x4PredModeFlag, "pMbCache->pRemIntra4x4PredModeFlag"); pMbCache->pRemIntra4x4PredModeFlag = NULL; } if (NULL != pMbCache->pDct) { pMa->WelsFree (pMbCache->pDct, "pMbCache->pDct"); pMbCache->pDct = NULL; } } /*! * \brief initialize ppDqLayerList and slicepEncCtx_list due to count number of layers available * \pParam pCtx sWelsEncCtx* * \return 0 - successful; otherwise failed */ static inline int32_t InitDqLayers (sWelsEncCtx** ppCtx) { SWelsSvcCodingParam* pParam = NULL; SWelsSPS* pSps = NULL; SSubsetSps* pSubsetSps = NULL; SWelsPPS* pPps = NULL; CMemoryAlign* pMa = NULL; SStrideTables* pStrideTab = NULL; int32_t iDlayerCount = 0; int32_t iDlayerIndex = 0; uint32_t iSpsId = 0; uint32_t iPpsId = 0; uint32_t iNumRef = 0; int32_t iResult = 0; if (NULL == ppCtx || NULL == *ppCtx) return 1; pMa = (*ppCtx)->pMemAlign; pParam = (*ppCtx)->pSvcParam; iDlayerCount = pParam->iNumDependencyLayer; iNumRef = pParam->iNumRefFrame; // highest_layers_in_temporal = 1 + WELS_MAX(pParam->iDecompStages, 1); pStrideTab = (*ppCtx)->pStrideTab; iDlayerIndex = 0; while (iDlayerIndex < iDlayerCount) { SRefList* pRefList = NULL; uint32_t i = 0; const int32_t kiWidth = pParam->sDependencyLayers[iDlayerIndex].iFrameWidth; const int32_t kiHeight = pParam->sDependencyLayers[iDlayerIndex].iFrameHeight; int32_t iPicWidth = WELS_ALIGN (kiWidth, MB_WIDTH_LUMA) + (PADDING_LENGTH << 1); // with iWidth of horizon int32_t iPicChromaWidth = iPicWidth >> 1; iPicWidth = WELS_ALIGN (iPicWidth, 32); // 32(or 16 for chroma below) to match original imp. here instead of iCacheLineSize iPicChromaWidth = WELS_ALIGN (iPicChromaWidth, 16); WelsGetEncBlockStrideOffset ((*ppCtx)->pStrideTab->pStrideEncBlockOffset[iDlayerIndex], iPicWidth, iPicChromaWidth); // pRef list pRefList = (SRefList*)pMa->WelsMallocz (sizeof (SRefList), "pRefList"); WELS_VERIFY_RETURN_PROC_IF (1, (NULL == pRefList), FreeMemorySvc (ppCtx)) do { pRefList->pRef[i] = AllocPicture (pMa, kiWidth, kiHeight, true); // to use actual size of current layer WELS_VERIFY_RETURN_PROC_IF (1, (NULL == pRefList->pRef[i]), FreeMemorySvc (ppCtx)) ++ i; } while (i < 1 + iNumRef); pRefList->pNextBuffer = pRefList->pRef[0]; (*ppCtx)->ppRefPicListExt[iDlayerIndex] = pRefList; ++ iDlayerIndex; } // for I420 based source spatial pictures if (AllocSpatialPictures (ppCtx, pParam)) { FreeMemorySvc (ppCtx); return 1; } iDlayerIndex = 0; while (iDlayerIndex < iDlayerCount) { SDqLayer* pDqLayer = NULL; SDLayerParam* pDlayer = &pParam->sDependencyLayers[iDlayerIndex]; const int32_t kiMbW = (pDlayer->iFrameWidth + 0x0f) >> 4; const int32_t kiMbH = (pDlayer->iFrameHeight + 0x0f) >> 4; int32_t iMaxSliceNum = 1; const int32_t kiSliceNum = GetInitialSliceNum (kiMbW, kiMbH, &pDlayer->sMso); if (iMaxSliceNum < kiSliceNum) iMaxSliceNum = kiSliceNum; // pDq layers list pDqLayer = (SDqLayer*)pMa->WelsMallocz (sizeof (SDqLayer), "pDqLayer"); WELS_VERIFY_RETURN_PROC_IF (1, (NULL == pDqLayer), FreeMemorySvc (ppCtx)) // for dynamic slicing mode if (SM_DYN_SLICE == pDlayer->sMso.uiSliceMode) { const int32_t iSize = pParam->iCountThreadsNum * sizeof (int32_t); pDqLayer->pNumSliceCodedOfPartition = (int32_t*)pMa->WelsMallocz (iSize, "pNumSliceCodedOfPartition"); pDqLayer->pLastCodedMbIdxOfPartition = (int32_t*)pMa->WelsMallocz (iSize, "pLastCodedMbIdxOfPartition"); pDqLayer->pLastMbIdxOfPartition = (int32_t*)pMa->WelsMallocz (iSize, "pLastMbIdxOfPartition"); WELS_VERIFY_RETURN_PROC_IF (1, (NULL == pDqLayer->pNumSliceCodedOfPartition || NULL == pDqLayer->pLastCodedMbIdxOfPartition || NULL == pDqLayer->pLastMbIdxOfPartition), FreeMemorySvc (ppCtx)) } pDqLayer->iMbWidth = kiMbW; pDqLayer->iMbHeight = kiMbH; #ifndef MT_ENABLED if (SM_DYN_SLICE == pDlayer->sMso.uiSliceMode) { //wmalloc pSliceInLayer SSlice* pSlice = NULL; int32_t iSliceIdx = 0; //wmalloc AVERSLICENUM_CONSTANT of pDqLayer->sLayerInfo.pSliceInLayer, //wmalloc AVERSLICENUM_CONSTANT num of pSlice as initialization //only set value for the first pSlice pDqLayer->sLayerInfo.pSliceInLayer = (SSlice*)pMa->WelsMallocz (sizeof (SSlice) * iMaxSliceNum, "pSliceInLayer"); WELS_VERIFY_RETURN_PROC_IF (1, (NULL == pDqLayer->sLayerInfo.pSliceInLayer), FreeMemorySvc (ppCtx)) { pSlice = &pDqLayer->sLayerInfo.pSliceInLayer[0]; pSlice->uiSliceIdx = 0; pSlice->pSliceBsa = & (*ppCtx)->pOut->sBsWrite; } while (iSliceIdx < iMaxSliceNum) { pSlice = &pDqLayer->sLayerInfo.pSliceInLayer[iSliceIdx]; if (AllocMbCacheAligned (&pSlice->sMbCacheInfo, pMa)) { FreeMemorySvc (ppCtx); return 1; } ++ iSliceIdx; } } else #endif//!MT_ENABLED { int32_t iSliceIdx = 0; pDqLayer->sLayerInfo.pSliceInLayer = (SSlice*)pMa->WelsMallocz (sizeof (SSlice) * iMaxSliceNum, "pSliceInLayer"); WELS_VERIFY_RETURN_PROC_IF (1, (NULL == pDqLayer->sLayerInfo.pSliceInLayer), FreeMemorySvc (ppCtx)) if (iMaxSliceNum > 1) { while (iSliceIdx < iMaxSliceNum) { SSlice* pSlice = &pDqLayer->sLayerInfo.pSliceInLayer[iSliceIdx]; pSlice->uiSliceIdx = iSliceIdx; #ifdef MT_ENABLED if (pParam->iMultipleThreadIdc > 1) pSlice->pSliceBsa = & (*ppCtx)->pSliceBs[iSliceIdx].sBsWrite; else pSlice->pSliceBsa = & (*ppCtx)->pOut->sBsWrite; #else pSlice->pSliceBsa = & (*ppCtx)->pOut->sBsWrite; #endif//MT_ENABLED if (AllocMbCacheAligned (&pSlice->sMbCacheInfo, pMa)) { FreeMemorySvc (ppCtx); return 1; } ++ iSliceIdx; } } // fix issue in case single pSlice coding might be inclusive exist in variant spatial layer setting, also introducing multi-pSlice modes else { // only one pSlice SSlice* pSlice = &pDqLayer->sLayerInfo.pSliceInLayer[0]; pSlice->uiSliceIdx = 0; pSlice->pSliceBsa = & (*ppCtx)->pOut->sBsWrite; if (AllocMbCacheAligned (&pSlice->sMbCacheInfo, pMa)) { FreeMemorySvc (ppCtx); return 1; } } } //deblocking parameters initialization //target-layer deblocking pDqLayer->iLoopFilterDisableIdc = pParam->iLoopFilterDisableIdc; pDqLayer->iLoopFilterAlphaC0Offset = (pParam->iLoopFilterAlphaC0Offset) << 1; pDqLayer->iLoopFilterBetaOffset = (pParam->iLoopFilterBetaOffset) << 1; //inter-layer deblocking pDqLayer->uiDisableInterLayerDeblockingFilterIdc = pParam->iInterLayerLoopFilterDisableIdc; pDqLayer->iInterLayerSliceAlphaC0Offset = (pParam->iInterLayerLoopFilterAlphaC0Offset) << 1; pDqLayer->iInterLayerSliceBetaOffset = (pParam->iInterLayerLoopFilterBetaOffset) << 1; //parallel deblocking pDqLayer->bDeblockingParallelFlag = pParam->bDeblockingParallelFlag; //deblocking parameter adjustment if (SM_SINGLE_SLICE == pDlayer->sMso.uiSliceMode) { //iLoopFilterDisableIdc: will be 0 or 1 under single_slice if (2 == pParam->iLoopFilterDisableIdc) { pDqLayer->iLoopFilterDisableIdc = 0; } //bDeblockingParallelFlag pDqLayer->bDeblockingParallelFlag = false; } else { //multi-pSlice #ifdef MT_ENABLED if (0 == pDqLayer->iLoopFilterDisableIdc) { pDqLayer->bDeblockingParallelFlag = false; } #endif } (*ppCtx)->ppDqLayerList[iDlayerIndex] = pDqLayer; ++ iDlayerIndex; } // for dynamically malloc for parameter sets memory instead of maximal items for standard to reduce size, 3/18/2010 if (& (*ppCtx)->pSvcParam->bMgsT0OnlyStrategy) { (*ppCtx)->pPPSArray = (SWelsPPS*)pMa->WelsMalloc ((1 + iDlayerCount) * sizeof (SWelsPPS), "pPPSArray"); } else { (*ppCtx)->pPPSArray = (SWelsPPS*)pMa->WelsMalloc (iDlayerCount * sizeof (SWelsPPS), "pPPSArray"); } WELS_VERIFY_RETURN_PROC_IF (1, (NULL == (*ppCtx)->pPPSArray), FreeMemorySvc (ppCtx)) (*ppCtx)->pSpsArray = (SWelsSPS*)pMa->WelsMalloc (sizeof (SWelsSPS), "pSpsArray"); WELS_VERIFY_RETURN_PROC_IF (1, (NULL == (*ppCtx)->pSpsArray), FreeMemorySvc (ppCtx)) if (iDlayerCount > 1) { (*ppCtx)->pSubsetArray = (SSubsetSps*)pMa->WelsMalloc ((iDlayerCount - 1) * sizeof (SSubsetSps), "pSubsetArray"); WELS_VERIFY_RETURN_PROC_IF (1, (NULL == (*ppCtx)->pSubsetArray), FreeMemorySvc (ppCtx)) } (*ppCtx)->pDqIdcMap = (SDqIdc*)pMa->WelsMallocz (iDlayerCount * sizeof (SDqIdc), "pDqIdcMap"); WELS_VERIFY_RETURN_PROC_IF (1, (NULL == (*ppCtx)->pDqIdcMap), FreeMemorySvc (ppCtx)) iDlayerIndex = 0; while (iDlayerIndex < iDlayerCount) { SDqIdc* pDqIdc = & (*ppCtx)->pDqIdcMap[iDlayerIndex]; const bool_t bUseSubsetSps = (iDlayerIndex > BASE_DEPENDENCY_ID); SDLayerParam* pDlayerParam = &pParam->sDependencyLayers[iDlayerIndex]; pDqIdc->uiSpatialId = iDlayerIndex; pPps = & (*ppCtx)->pPPSArray[iPpsId]; if (!bUseSubsetSps) { pSps = & (*ppCtx)->pSpsArray[iSpsId]; } else { pSubsetSps = & (*ppCtx)->pSubsetArray[iSpsId]; pSps = &pSubsetSps->pSps; } // Need port pSps/pPps initialization due to spatial scalability changed if (!bUseSubsetSps) { WelsInitSps (pSps, pDlayerParam, pParam->uiIntraPeriod, pParam->iNumRefFrame, iSpsId, pParam->bEnableFrameCroppingFlag, pParam->bEnableRc); if (iDlayerCount > 1) { pSps->bConstraintSet0Flag = true; pSps->bConstraintSet1Flag = true; pSps->bConstraintSet2Flag = true; } } else { WelsInitSubsetSps (pSubsetSps, pDlayerParam, pParam->uiIntraPeriod, pParam->iNumRefFrame, iSpsId, pParam->bEnableFrameCroppingFlag, pParam->bEnableRc); } // initialize pPps WelsInitPps (pPps, pSps, pSubsetSps, iPpsId, true, bUseSubsetSps); // Not using FMO in SVC coding so far, come back if need FMO { iResult = InitSlicePEncCtx (& (*ppCtx)->pSliceCtxList[iDlayerIndex], (*ppCtx)->pMemAlign, false, pSps->iMbWidth, pSps->iMbHeight, & (pDlayerParam->sMso), pPps); if (iResult) { WelsLog (*ppCtx, WELS_LOG_WARNING, "InitDqLayers(), InitSlicePEncCtx failed(%d)!", iResult); FreeMemorySvc (ppCtx); return 1; } (*ppCtx)->ppDqLayerList[iDlayerIndex]->pSliceEncCtx = & (*ppCtx)->pSliceCtxList[iDlayerIndex]; } pDqIdc->iSpsId = iSpsId; pDqIdc->iPpsId = iPpsId; (*ppCtx)->sPSOVector.bPpsIdMappingIntoSubsetsps[iPpsId] = bUseSubsetSps; if (bUseSubsetSps) ++ iSpsId; ++ iPpsId; ++ (*ppCtx)->iSpsNum; ++ (*ppCtx)->iPpsNum; ++ iDlayerIndex; } return 0; } int32_t AllocStrideTables (sWelsEncCtx** ppCtx, const int32_t kiNumSpatialLayers) { CMemoryAlign* pMa = (*ppCtx)->pMemAlign; SWelsSvcCodingParam* pParam = (*ppCtx)->pSvcParam; SStrideTables* pPtr = NULL; int16_t* pTmpRow = NULL, *pRowX = NULL, *pRowY = NULL, *p = NULL; uint8_t* pBase = NULL; uint8_t* pBaseDec = NULL, *pBaseEnc = NULL, *pBaseMbX = NULL, *pBaseMbY = NULL; struct { int32_t iMbWidth; int32_t iCountMbNum; // count number of SMB in each spatial int32_t iSizeAllMbAlignCache; // cache line size aligned in each spatial } sMbSizeMap[MAX_DEPENDENCY_LAYER] = {0}; int32_t iLineSizeY[MAX_DEPENDENCY_LAYER][2] = {0}; int32_t iLineSizeUV[MAX_DEPENDENCY_LAYER][2] = {0}; int32_t iMapSpatialIdx[MAX_DEPENDENCY_LAYER][2] = {0}; int32_t iSizeDec = 0; int32_t iSizeEnc = 0; int32_t iCountLayersNeedCs[2] = {0}; const int32_t kiUnit1Size = 24 * sizeof (int32_t); int32_t iUnit2Size = 0; int32_t iNeedAllocSize = 0; int32_t iRowSize = 0; int16_t iMaxMbWidth = 0; int16_t iMaxMbHeight = 0; int32_t i = 0; int32_t iSpatialIdx = 0; int32_t iTemporalIdx = 0; int32_t iCntTid = 0; if (kiNumSpatialLayers <= 0 || kiNumSpatialLayers > MAX_DEPENDENCY_LAYER) return 1; pPtr = (SStrideTables*)pMa->WelsMalloc (sizeof (SStrideTables), "SStrideTables"); if (NULL == pPtr) return 1; (*ppCtx)->pStrideTab = pPtr; iCntTid = pParam->iNumTemporalLayer > 1 ? 2 : 1; iSpatialIdx = 0; while (iSpatialIdx < kiNumSpatialLayers) { const int32_t kiTmpWidth = (pParam->sDependencyLayers[iSpatialIdx].iFrameWidth + 15) >> 4; const int32_t kiTmpHeight = (pParam->sDependencyLayers[iSpatialIdx].iFrameHeight + 15) >> 4; int32_t iNumMb = kiTmpWidth * kiTmpHeight; sMbSizeMap[iSpatialIdx].iMbWidth = kiTmpWidth; sMbSizeMap[iSpatialIdx].iCountMbNum = iNumMb; iNumMb *= sizeof (int16_t); sMbSizeMap[iSpatialIdx].iSizeAllMbAlignCache = iNumMb; iUnit2Size += iNumMb; ++ iSpatialIdx; } // Adaptive size_cs, size_fdec by implementation dependency iTemporalIdx = 0; while (iTemporalIdx < iCntTid) { const bool_t kbBaseTemporalFlag = (iTemporalIdx == 0); iSpatialIdx = 0; while (iSpatialIdx < kiNumSpatialLayers) { SDLayerParam* fDlp = &pParam->sDependencyLayers[iSpatialIdx]; const int32_t kiWidthPad = WELS_ALIGN (fDlp->iFrameWidth, 16) + (PADDING_LENGTH << 1); iLineSizeY[iSpatialIdx][kbBaseTemporalFlag] = WELS_ALIGN (kiWidthPad, 32); iLineSizeUV[iSpatialIdx][kbBaseTemporalFlag] = WELS_ALIGN ((kiWidthPad >> 1), 16); iMapSpatialIdx[iCountLayersNeedCs[kbBaseTemporalFlag]][kbBaseTemporalFlag] = iSpatialIdx; ++ iCountLayersNeedCs[kbBaseTemporalFlag]; ++ iSpatialIdx; } ++ iTemporalIdx; } iSizeDec = kiUnit1Size * (iCountLayersNeedCs[0] + iCountLayersNeedCs[1]); iSizeEnc = kiUnit1Size * kiNumSpatialLayers; iNeedAllocSize = iSizeDec + iSizeEnc + (iUnit2Size << 1); pBase = (uint8_t*)pMa->WelsMalloc (iNeedAllocSize, "pBase"); if (NULL == pBase) { return 1; } pBaseDec = pBase; // iCountLayersNeedCs pBaseEnc = pBaseDec + iSizeDec; // iNumSpatialLayers pBaseMbX = pBaseEnc + iSizeEnc; // iNumSpatialLayers pBaseMbY = pBaseMbX + iUnit2Size; // iNumSpatialLayers iTemporalIdx = 0; while (iTemporalIdx < iCntTid) { const bool_t kbBaseTemporalFlag = (iTemporalIdx == 0); iSpatialIdx = 0; while (iSpatialIdx < iCountLayersNeedCs[kbBaseTemporalFlag]) { const int32_t kiActualSpatialIdx = iMapSpatialIdx[iSpatialIdx][kbBaseTemporalFlag]; const int32_t kiLumaWidth = iLineSizeY[kiActualSpatialIdx][kbBaseTemporalFlag]; const int32_t kiChromaWidth = iLineSizeUV[kiActualSpatialIdx][kbBaseTemporalFlag]; WelsGetEncBlockStrideOffset ((int32_t*)pBaseDec, kiLumaWidth, kiChromaWidth); pPtr->pStrideDecBlockOffset[kiActualSpatialIdx][kbBaseTemporalFlag] = (int32_t*)pBaseDec; pBaseDec += kiUnit1Size; ++ iSpatialIdx; } ++ iTemporalIdx; } iTemporalIdx = 0; while (iTemporalIdx < iCntTid) { const bool_t kbBaseTemporalFlag = (iTemporalIdx == 0); iSpatialIdx = 0; while (iSpatialIdx < kiNumSpatialLayers) { int32_t iMatchIndex = 0; bool_t bInMap = false; bool_t bMatchFlag = false; i = 0; while (i < iCountLayersNeedCs[kbBaseTemporalFlag]) { const int32_t kiActualIdx = iMapSpatialIdx[i][kbBaseTemporalFlag]; if (kiActualIdx == iSpatialIdx) { bInMap = true; break; } if (!bMatchFlag) { iMatchIndex = kiActualIdx; bMatchFlag = true; } ++ i; } if (bInMap) { ++ iSpatialIdx; continue; } // not in spatial map and assign match one to it pPtr->pStrideDecBlockOffset[iSpatialIdx][kbBaseTemporalFlag] = pPtr->pStrideDecBlockOffset[iMatchIndex][kbBaseTemporalFlag]; ++ iSpatialIdx; } ++ iTemporalIdx; } iSpatialIdx = 0; while (iSpatialIdx < kiNumSpatialLayers) { const int32_t kiAllocMbSize = sMbSizeMap[iSpatialIdx].iSizeAllMbAlignCache; pPtr->pStrideEncBlockOffset[iSpatialIdx] = (int32_t*)pBaseEnc; pPtr->pMbIndexX[iSpatialIdx] = (int16_t*)pBaseMbX; pPtr->pMbIndexY[iSpatialIdx] = (int16_t*)pBaseMbY; pBaseEnc += kiUnit1Size; pBaseMbX += kiAllocMbSize; pBaseMbY += kiAllocMbSize; ++ iSpatialIdx; } while (iSpatialIdx < MAX_DEPENDENCY_LAYER) { pPtr->pStrideDecBlockOffset[iSpatialIdx][0] = NULL; pPtr->pStrideDecBlockOffset[iSpatialIdx][1] = NULL; pPtr->pStrideEncBlockOffset[iSpatialIdx] = NULL; pPtr->pMbIndexX[iSpatialIdx] = NULL; pPtr->pMbIndexY[iSpatialIdx] = NULL; ++ iSpatialIdx; } // initialize pMbIndexX and pMbIndexY tables as below iMaxMbWidth = sMbSizeMap[kiNumSpatialLayers - 1].iMbWidth; iMaxMbWidth = WELS_ALIGN (iMaxMbWidth, 4); // 4 loops for int16_t required introduced as below iRowSize = iMaxMbWidth * sizeof (int16_t); pTmpRow = (int16_t*)pMa->WelsMalloc (iRowSize, "pTmpRow"); if (NULL == pTmpRow) { return 1; } pRowX = pTmpRow; pRowY = pRowX; // initialize pRowX & pRowY i = 0; p = pRowX; while (i < iMaxMbWidth) { *p = i; * (p + 1) = 1 + i; * (p + 2) = 2 + i; * (p + 3) = 3 + i; p += 4; i += 4; } iSpatialIdx = kiNumSpatialLayers; while (--iSpatialIdx >= 0) { int16_t* pMbIndexX = pPtr->pMbIndexX[iSpatialIdx]; const int32_t kiMbWidth = sMbSizeMap[iSpatialIdx].iMbWidth; const int32_t kiMbHeight = sMbSizeMap[iSpatialIdx].iCountMbNum / kiMbWidth; const int32_t kiLineSize = kiMbWidth * sizeof (int16_t); i = 0; while (i < kiMbHeight) { memcpy (pMbIndexX, pRowX, kiLineSize); // confirmed_safe_unsafe_usage pMbIndexX += kiMbWidth; ++ i; } } memset (pRowY, 0, iRowSize); iMaxMbHeight = sMbSizeMap[kiNumSpatialLayers - 1].iCountMbNum / sMbSizeMap[kiNumSpatialLayers - 1].iMbWidth; i = 0; for (;;) { ENFORCE_STACK_ALIGN_1D (int16_t, t, 4, 16) int32_t t32 = 0; int16_t j = 0; for (iSpatialIdx = kiNumSpatialLayers - 1; iSpatialIdx >= 0; -- iSpatialIdx) { const int32_t kiMbWidth = sMbSizeMap[iSpatialIdx].iMbWidth; const int32_t kiMbHeight = sMbSizeMap[iSpatialIdx].iCountMbNum / kiMbWidth; const int32_t kiLineSize = kiMbWidth * sizeof (int16_t); int16_t* pMbIndexY = pPtr->pMbIndexY[iSpatialIdx] + i * kiMbWidth; if (i < kiMbHeight) { memcpy (pMbIndexY, pRowY, kiLineSize); // confirmed_safe_unsafe_usage } } ++ i; if (i >= iMaxMbHeight) break; t32 = i | (i << 16); ST32 (t , t32); ST32 (t + 2, t32); p = pRowY; while (j < iMaxMbWidth) { ST64 (p, LD64 (t)); p += 4; j += 4; } } pMa->WelsFree (pTmpRow, "pTmpRow"); pTmpRow = NULL; return 0; } /*! * \brief request specific memory for SVC * \pParam pEncCtx sWelsEncCtx* * \return successful - 0; otherwise none 0 for failed */ int32_t RequestMemorySvc (sWelsEncCtx** ppCtx) { SWelsSvcCodingParam* pParam = (*ppCtx)->pSvcParam; CMemoryAlign* pMa = (*ppCtx)->pMemAlign; SDLayerParam* pFinalSpatial = NULL; int32_t iCountBsLen = 0; int32_t iCountNals = 0; int32_t iMaxPicWidth = 0; int32_t iMaxPicHeight = 0; int32_t iCountMaxMbNum = 0; int32_t iIndex = 0; int32_t iCountLayers = 0; int32_t iResult = 0; float fCompressRatioThr = .5f; const int32_t kiNumDependencyLayers = pParam->iNumDependencyLayer; const uint32_t kuiMvdInterTableSize = (kiNumDependencyLayers == 1 ? (1 + (648 << 1)) : (1 + (972 << 1))); const uint32_t kuiMvdCacheAlginedSize = kuiMvdInterTableSize * sizeof (uint16_t); int32_t iVclLayersBsSizeCount = 0; int32_t iNonVclLayersBsSizeCount = 0; #if defined(MT_ENABLED) int32_t iTargetSpatialBsSize = 0; #endif//MT_ENABLED if (kiNumDependencyLayers < 1 || kiNumDependencyLayers > MAX_DEPENDENCY_LAYER) { WelsLog (*ppCtx, WELS_LOG_WARNING, "RequestMemorySvc() failed due to invalid iNumDependencyLayers(%d)!\n", kiNumDependencyLayers); FreeMemorySvc (ppCtx); return 1; } if (pParam->uiGopSize == 0 || (pParam->uiIntraPeriod && ((pParam->uiIntraPeriod % pParam->uiGopSize) != 0))) { WelsLog (*ppCtx, WELS_LOG_WARNING, "RequestMemorySvc() failed due to invalid uiIntraPeriod(%d) (=multipler of uiGopSize(%d)!", pParam->uiIntraPeriod, pParam->uiGopSize); FreeMemorySvc (ppCtx); return 1; } pFinalSpatial = &pParam->sDependencyLayers[kiNumDependencyLayers - 1]; iMaxPicWidth = pFinalSpatial->iFrameWidth; iMaxPicHeight = pFinalSpatial->iFrameHeight; iCountMaxMbNum = ((15 + iMaxPicWidth) >> 4) * ((15 + iMaxPicHeight) >> 4); iResult = AcquireLayersNals (ppCtx, pParam, &iCountLayers, &iCountNals); if (iResult) { WelsLog (*ppCtx, WELS_LOG_WARNING, "RequestMemorySvc(), AcquireLayersNals failed(%d)!", iResult); FreeMemorySvc (ppCtx); return 1; } iNonVclLayersBsSizeCount = SSEI_BUFFER_SIZE + pParam->iNumDependencyLayer * SPS_BUFFER_SIZE + (1 + pParam->iNumDependencyLayer) * PPS_BUFFER_SIZE; int32_t iLayerBsSize = 0; iIndex = 0; while (iIndex < pParam->iNumDependencyLayer) { SDLayerParam* fDlp = &pParam->sDependencyLayers[iIndex]; fCompressRatioThr = COMPRESS_RATIO_DECIDED_BY_RESOLUTION (fDlp->iFrameWidth, fDlp->iFrameHeight); iLayerBsSize = WELS_ROUND (((3 * fDlp->iFrameWidth * fDlp->iFrameHeight) >> 1) * fCompressRatioThr); iLayerBsSize = WELS_ALIGN (iLayerBsSize, 4); // 4 bytes alinged iVclLayersBsSizeCount += iLayerBsSize; ++ iIndex; } #if defined(MT_ENABLED) iTargetSpatialBsSize = iLayerBsSize; #endif//MT_ENABLED iCountBsLen = iNonVclLayersBsSizeCount + iVclLayersBsSizeCount; pParam->iNumRefFrame = WELS_CLIP3 (pParam->iNumRefFrame, MIN_REF_PIC_COUNT, MAX_REFERENCE_PICTURE_COUNT_NUM); // Output (*ppCtx)->pOut = (SWelsEncoderOutput*)pMa->WelsMalloc (sizeof (SWelsEncoderOutput), "SWelsEncoderOutput"); WELS_VERIFY_RETURN_PROC_IF (1, (NULL == (*ppCtx)->pOut), FreeMemorySvc (ppCtx)) (*ppCtx)->pOut->pBsBuffer = (uint8_t*)pMa->WelsMalloc (iCountBsLen, "pOut->pBsBuffer"); WELS_VERIFY_RETURN_PROC_IF (1, (NULL == (*ppCtx)->pOut->pBsBuffer), FreeMemorySvc (ppCtx)) (*ppCtx)->pOut->uiSize = iCountBsLen; (*ppCtx)->pOut->sNalList = (SWelsNalRaw*)pMa->WelsMalloc (iCountNals * sizeof (SWelsNalRaw), "pOut->sNalList"); WELS_VERIFY_RETURN_PROC_IF (1, (NULL == (*ppCtx)->pOut->sNalList), FreeMemorySvc (ppCtx)) (*ppCtx)->pOut->iCountNals = iCountNals; (*ppCtx)->pOut->iNalIndex = 0; #ifdef MT_ENABLED if (pParam->iMultipleThreadIdc > 1) { (*ppCtx)->pFrameBs = (uint8_t*)pMa->WelsMalloc (iCountBsLen + (iTargetSpatialBsSize * ((*ppCtx)->iMaxSliceCount - 1)), "pFrameBs"); WELS_VERIFY_RETURN_PROC_IF (1, (NULL == (*ppCtx)->pFrameBs), FreeMemorySvc (ppCtx)) (*ppCtx)->iFrameBsSize = iCountBsLen * (*ppCtx)->iMaxSliceCount; } else #endif//MT_ENABLED { (*ppCtx)->pFrameBs = (uint8_t*)pMa->WelsMalloc (iCountBsLen, "pFrameBs"); WELS_VERIFY_RETURN_PROC_IF (1, (NULL == (*ppCtx)->pFrameBs), FreeMemorySvc (ppCtx)) (*ppCtx)->iFrameBsSize = iCountBsLen; } (*ppCtx)->iPosBsBuffer = 0; #ifdef MT_ENABLED // for pSlice bs buffers if (pParam->iMultipleThreadIdc > 1 && RequestMtResource (ppCtx, pParam, iCountBsLen, iTargetSpatialBsSize)) { WelsLog (*ppCtx, WELS_LOG_WARNING, "RequestMemorySvc(), RequestMtResource failed!"); FreeMemorySvc (ppCtx); return 1; } #endif (*ppCtx)->pIntra4x4PredModeBlocks = static_cast<int8_t*> (pMa->WelsMallocz (iCountMaxMbNum * INTRA_4x4_MODE_NUM, "pIntra4x4PredModeBlocks")); WELS_VERIFY_RETURN_PROC_IF (1, (NULL == (*ppCtx)->pIntra4x4PredModeBlocks), FreeMemorySvc (ppCtx)) (*ppCtx)->pNonZeroCountBlocks = static_cast<int8_t*> (pMa->WelsMallocz (iCountMaxMbNum * MB_LUMA_CHROMA_BLOCK4x4_NUM, "pNonZeroCountBlocks")); WELS_VERIFY_RETURN_PROC_IF (1, (NULL == (*ppCtx)->pNonZeroCountBlocks), FreeMemorySvc (ppCtx)) (*ppCtx)->pMvUnitBlock4x4 = static_cast<SMVUnitXY*> (pMa->WelsMallocz (iCountMaxMbNum * 2 * MB_BLOCK4x4_NUM * sizeof (SMVUnitXY), "pMvUnitBlock4x4")); WELS_VERIFY_RETURN_PROC_IF (1, (NULL == (*ppCtx)->pMvUnitBlock4x4), FreeMemorySvc (ppCtx)) (*ppCtx)->pRefIndexBlock4x4 = static_cast<int8_t*> (pMa->WelsMallocz (iCountMaxMbNum * 2 * MB_BLOCK8x8_NUM * sizeof (int8_t), "pRefIndexBlock4x4")); WELS_VERIFY_RETURN_PROC_IF (1, (NULL == (*ppCtx)->pRefIndexBlock4x4), FreeMemorySvc (ppCtx)) (*ppCtx)->pSadCostMb = static_cast<int32_t*> (pMa->WelsMallocz (iCountMaxMbNum * sizeof (int32_t), "pSadCostMb")); WELS_VERIFY_RETURN_PROC_IF (1, (NULL == (*ppCtx)->pSadCostMb), FreeMemorySvc (ppCtx)) (*ppCtx)->bEncCurFrmAsIdrFlag = true; // make sure first frame is IDR (*ppCtx)->iGlobalQp = 26; // global qp in default (*ppCtx)->pLtr = (SLTRState*)pMa->WelsMalloc (kiNumDependencyLayers * sizeof (SLTRState), "SLTRState"); WELS_VERIFY_RETURN_PROC_IF (1, (NULL == (*ppCtx)->pLtr), FreeMemorySvc (ppCtx)) int32_t i = 0; for (i = 0; i < kiNumDependencyLayers; i++) { ResetLtrState (& (*ppCtx)->pLtr[i]); } (*ppCtx)->ppRefPicListExt = (SRefList**)pMa->WelsMalloc (kiNumDependencyLayers * sizeof (SRefList*), "ppRefPicListExt"); WELS_VERIFY_RETURN_PROC_IF (1, (NULL == (*ppCtx)->ppRefPicListExt), FreeMemorySvc (ppCtx)) // pSlice context list (*ppCtx)->pSliceCtxList = (SSliceCtx*)pMa->WelsMallocz (kiNumDependencyLayers * sizeof (SSliceCtx), "pSliceCtxList"); WELS_VERIFY_RETURN_PROC_IF (1, (NULL == (*ppCtx)->pSliceCtxList), FreeMemorySvc (ppCtx)) (*ppCtx)->ppDqLayerList = (SDqLayer**)pMa->WelsMalloc (kiNumDependencyLayers * sizeof (SDqLayer*), "ppDqLayerList"); WELS_VERIFY_RETURN_PROC_IF (1, (NULL == (*ppCtx)->ppDqLayerList), FreeMemorySvc (ppCtx)) // stride tables if (AllocStrideTables (ppCtx, kiNumDependencyLayers)) { WelsLog (*ppCtx, WELS_LOG_WARNING, "RequestMemorySvc(), AllocStrideTables failed!"); FreeMemorySvc (ppCtx); return 1; } //Rate control module memory allocation // only malloc once for RC pData, 12/14/2009 (*ppCtx)->pWelsSvcRc = (SWelsSvcRc*)pMa->WelsMallocz (kiNumDependencyLayers * sizeof (SWelsSvcRc), "pWelsSvcRc"); WELS_VERIFY_RETURN_PROC_IF (1, (NULL == (*ppCtx)->pWelsSvcRc), FreeMemorySvc (ppCtx)) //End of Rate control module memory allocation //pVaa memory allocation (*ppCtx)->pVaa = (SVAAFrameInfo*)pMa->WelsMallocz (sizeof (SVAAFrameInfo), "pVaa"); WELS_VERIFY_RETURN_PROC_IF (1, (NULL == (*ppCtx)->pVaa), FreeMemorySvc (ppCtx)) if ((*ppCtx)->pSvcParam->bEnableAdaptiveQuant) { //malloc mem (*ppCtx)->pVaa->sAdaptiveQuantParam.pMotionTextureUnit = static_cast<SMotionTextureUnit*> (pMa->WelsMallocz (iCountMaxMbNum * sizeof (SMotionTextureUnit), "pVaa->sAdaptiveQuantParam.pMotionTextureUnit")); WELS_VERIFY_RETURN_PROC_IF (1, (NULL == (*ppCtx)->pVaa->sAdaptiveQuantParam.pMotionTextureUnit), FreeMemorySvc (ppCtx)) (*ppCtx)->pVaa->sAdaptiveQuantParam.pMotionTextureIndexToDeltaQp = static_cast<int8_t*> (pMa->WelsMallocz (iCountMaxMbNum * sizeof (int8_t), "pVaa->sAdaptiveQuantParam.pMotionTextureIndexToDeltaQp")); WELS_VERIFY_RETURN_PROC_IF (1, (NULL == (*ppCtx)->pVaa->sAdaptiveQuantParam.pMotionTextureIndexToDeltaQp), FreeMemorySvc (ppCtx)) } (*ppCtx)->pVaa->pVaaBackgroundMbFlag = (int8_t*)pMa->WelsMallocz (iCountMaxMbNum * sizeof (int8_t), "pVaa->vaa_skip_mb_flag"); WELS_VERIFY_RETURN_PROC_IF (1, (NULL == (*ppCtx)->pVaa->pVaaBackgroundMbFlag), FreeMemorySvc (ppCtx)) (*ppCtx)->pVaa->sVaaCalcInfo.pSad8x8 = static_cast<int32_t (*)[4]> (pMa->WelsMallocz (iCountMaxMbNum * 4 * sizeof (int32_t), "pVaa->sVaaCalcInfo.sad8x8")); WELS_VERIFY_RETURN_PROC_IF (1, (NULL == (*ppCtx)->pVaa->sVaaCalcInfo.pSad8x8), FreeMemorySvc (ppCtx)) (*ppCtx)->pVaa->sVaaCalcInfo.pSsd16x16 = static_cast<int32_t*> (pMa->WelsMallocz (iCountMaxMbNum * sizeof (int32_t), "pVaa->sVaaCalcInfo.pSsd16x16")); WELS_VERIFY_RETURN_PROC_IF (1, (NULL == (*ppCtx)->pVaa->sVaaCalcInfo.pSsd16x16), FreeMemorySvc (ppCtx)) (*ppCtx)->pVaa->sVaaCalcInfo.pSum16x16 = static_cast<int32_t*> (pMa->WelsMallocz (iCountMaxMbNum * sizeof (int32_t), "pVaa->sVaaCalcInfo.pSum16x16")); WELS_VERIFY_RETURN_PROC_IF (1, (NULL == (*ppCtx)->pVaa->sVaaCalcInfo.pSum16x16), FreeMemorySvc (ppCtx)) (*ppCtx)->pVaa->sVaaCalcInfo.pSumOfSquare16x16 = static_cast<int32_t*> (pMa->WelsMallocz (iCountMaxMbNum * sizeof (int32_t), "pVaa->sVaaCalcInfo.pSumOfSquare16x16")); WELS_VERIFY_RETURN_PROC_IF (1, (NULL == (*ppCtx)->pVaa->sVaaCalcInfo.pSumOfSquare16x16), FreeMemorySvc (ppCtx)) if ((*ppCtx)->pSvcParam->bEnableBackgroundDetection) { //BGD control (*ppCtx)->pVaa->sVaaCalcInfo.pSumOfDiff8x8 = static_cast<int32_t (*)[4]> (pMa->WelsMallocz (iCountMaxMbNum * 4 * sizeof (int32_t), "pVaa->sVaaCalcInfo.sd_16x16")); WELS_VERIFY_RETURN_PROC_IF (1, (NULL == (*ppCtx)->pVaa->sVaaCalcInfo.pSumOfDiff8x8), FreeMemorySvc (ppCtx)) (*ppCtx)->pVaa->sVaaCalcInfo.pMad8x8 = static_cast<uint8_t (*)[4]> (pMa->WelsMallocz (iCountMaxMbNum * 4 * sizeof (uint8_t), "pVaa->sVaaCalcInfo.mad_16x16")); WELS_VERIFY_RETURN_PROC_IF (1, (NULL == (*ppCtx)->pVaa->sVaaCalcInfo.pMad8x8), FreeMemorySvc (ppCtx)) } //End of pVaa memory allocation iResult = InitDqLayers (ppCtx); if (iResult) { WelsLog (*ppCtx, WELS_LOG_WARNING, "RequestMemorySvc(), InitDqLayers failed(%d)!", iResult); FreeMemorySvc (ppCtx); return iResult; } if (InitMbListD (ppCtx)) { WelsLog (*ppCtx, WELS_LOG_WARNING, "RequestMemorySvc(), InitMbListD failed!"); FreeMemorySvc (ppCtx); return 1; } (*ppCtx)->pMvdCostTableInter = (uint16_t*)pMa->WelsMallocz (52 * kuiMvdCacheAlginedSize, "pMvdCostTableInter"); WELS_VERIFY_RETURN_PROC_IF (1, (NULL == (*ppCtx)->pMvdCostTableInter), FreeMemorySvc (ppCtx)) MvdCostInit ((*ppCtx)->pMvdCostTableInter, kuiMvdInterTableSize); //should put to a better place? if ((*ppCtx)->ppRefPicListExt[0] != NULL && (*ppCtx)->ppRefPicListExt[0]->pRef[0] != NULL) (*ppCtx)->pDecPic = (*ppCtx)->ppRefPicListExt[0]->pRef[0]; else (*ppCtx)->pDecPic = NULL; // error here (*ppCtx)->pSps = & (*ppCtx)->pSpsArray[0]; (*ppCtx)->pPps = & (*ppCtx)->pPPSArray[0]; return 0; } /*! * \brief free memory in SVC core encoder * \pParam pEncCtx sWelsEncCtx* * \return none */ void FreeMemorySvc (sWelsEncCtx** ppCtx) { if (NULL != *ppCtx) { sWelsEncCtx* pCtx = *ppCtx; CMemoryAlign* pMa = pCtx->pMemAlign; SWelsSvcCodingParam* pParam = pCtx->pSvcParam; int32_t ilayer = 0; // SStrideTables if (NULL != pCtx->pStrideTab) { if (NULL != pCtx->pStrideTab->pStrideDecBlockOffset[0][1]) { pMa->WelsFree (pCtx->pStrideTab->pStrideDecBlockOffset[0][1], "pBase"); pCtx->pStrideTab->pStrideDecBlockOffset[0][1] = NULL; } pMa->WelsFree (pCtx->pStrideTab, "SStrideTables"); pCtx->pStrideTab = NULL; } // pDq idc map if (NULL != pCtx->pDqIdcMap) { pMa->WelsFree (pCtx->pDqIdcMap, "pDqIdcMap"); pCtx->pDqIdcMap = NULL; } if (NULL != pCtx->pOut) { // bs pBuffer if (NULL != pCtx->pOut->pBsBuffer) { pMa->WelsFree (pCtx->pOut->pBsBuffer, "pOut->pBsBuffer"); pCtx->pOut->pBsBuffer = NULL; } // NALs list if (NULL != pCtx->pOut->sNalList) { pMa->WelsFree (pCtx->pOut->sNalList, "pOut->sNalList"); pCtx->pOut->sNalList = NULL; } pMa->WelsFree (pCtx->pOut, "SWelsEncoderOutput"); pCtx->pOut = NULL; } #ifdef MT_ENABLED if (pParam != NULL && pParam->iMultipleThreadIdc > 1) ReleaseMtResource (ppCtx); #endif//MT_ENABLED // frame bitstream pBuffer if (NULL != pCtx->pFrameBs) { pMa->WelsFree (pCtx->pFrameBs, "pFrameBs"); pCtx->pFrameBs = NULL; } // pSpsArray if (NULL != pCtx->pSpsArray) { pMa->WelsFree (pCtx->pSpsArray, "pSpsArray"); pCtx->pSpsArray = NULL; } // pPPSArray if (NULL != pCtx->pPPSArray) { pMa->WelsFree (pCtx->pPPSArray, "pPPSArray"); pCtx->pPPSArray = NULL; } // subset_sps_array if (NULL != pCtx->pSubsetArray) { pMa->WelsFree (pCtx->pSubsetArray, "pSubsetArray"); pCtx->pSubsetArray = NULL; } if (NULL != pCtx->pIntra4x4PredModeBlocks) { pMa->WelsFree (pCtx->pIntra4x4PredModeBlocks, "pIntra4x4PredModeBlocks"); pCtx->pIntra4x4PredModeBlocks = NULL; } if (NULL != pCtx->pNonZeroCountBlocks) { pMa->WelsFree (pCtx->pNonZeroCountBlocks, "pNonZeroCountBlocks"); pCtx->pNonZeroCountBlocks = NULL; } if (NULL != pCtx->pMvUnitBlock4x4) { pMa->WelsFree (pCtx->pMvUnitBlock4x4, "pMvUnitBlock4x4"); pCtx->pMvUnitBlock4x4 = NULL; } if (NULL != pCtx->pRefIndexBlock4x4) { pMa->WelsFree (pCtx->pRefIndexBlock4x4, "pRefIndexBlock4x4"); pCtx->pRefIndexBlock4x4 = NULL; } if (NULL != pCtx->ppMbListD) { if (NULL != pCtx->ppMbListD[0]) { pMa->WelsFree (pCtx->ppMbListD[0], "ppMbListD[0]"); (*ppCtx)->ppMbListD[0] = NULL; } pMa->WelsFree (pCtx->ppMbListD, "ppMbListD"); pCtx->ppMbListD = NULL; } if (NULL != pCtx->pSadCostMb) { pMa->WelsFree (pCtx->pSadCostMb, "pSadCostMb"); pCtx->pSadCostMb = NULL; } // SLTRState if (NULL != pCtx->pLtr) { pMa->WelsFree (pCtx->pLtr, "SLTRState"); pCtx->pLtr = NULL; } // pDq layers list ilayer = 0; if (NULL != pCtx->ppDqLayerList && pParam != NULL) { while (ilayer < pParam->iNumDependencyLayer) { SDqLayer* pDq = pCtx->ppDqLayerList[ilayer]; SDLayerParam* pDlp = &pCtx->pSvcParam->sDependencyLayers[ilayer]; const BOOL_T kbIsDynamicSlicing = (SM_DYN_SLICE == pDlp->sMso.uiSliceMode); // pDq layers if (NULL != pDq) { if (NULL != pDq->sLayerInfo.pSliceInLayer) { int32_t iSliceIdx = 0; int32_t iSliceNum = GetInitialSliceNum (pDq->iMbWidth, pDq->iMbHeight, &pDlp->sMso); if (iSliceNum < 1) iSliceNum = 1; while (iSliceIdx < iSliceNum) { SSlice* pSlice = &pDq->sLayerInfo.pSliceInLayer[iSliceIdx]; FreeMbCache (&pSlice->sMbCacheInfo, pMa); ++ iSliceIdx; } pMa->WelsFree (pDq->sLayerInfo.pSliceInLayer, "pSliceInLayer"); pDq->sLayerInfo.pSliceInLayer = NULL; } if (kbIsDynamicSlicing) { pMa->WelsFree (pDq->pNumSliceCodedOfPartition, "pNumSliceCodedOfPartition"); pDq->pNumSliceCodedOfPartition = NULL; pMa->WelsFree (pDq->pLastCodedMbIdxOfPartition, "pLastCodedMbIdxOfPartition"); pDq->pLastCodedMbIdxOfPartition = NULL; pMa->WelsFree (pDq->pLastMbIdxOfPartition, "pLastMbIdxOfPartition"); pDq->pLastMbIdxOfPartition = NULL; } pMa->WelsFree (pDq, "pDq"); pDq = NULL; pCtx->ppDqLayerList[ilayer] = NULL; } ++ ilayer; } pMa->WelsFree (pCtx->ppDqLayerList, "ppDqLayerList"); pCtx->ppDqLayerList = NULL; } FreeSpatialPictures (pCtx); // reference picture list extension if (NULL != pCtx->ppRefPicListExt && pParam != NULL) { ilayer = 0; while (ilayer < pParam->iNumDependencyLayer) { SRefList* pRefList = pCtx->ppRefPicListExt[ilayer]; if (NULL != pRefList) { int32_t iRef = 0; do { if (pRefList->pRef[iRef] != NULL) { FreePicture (pMa, &pRefList->pRef[iRef]); } ++ iRef; } while (iRef < 1 + pParam->iNumRefFrame); pMa->WelsFree (pCtx->ppRefPicListExt[ilayer], "ppRefPicListExt[]"); pCtx->ppRefPicListExt[ilayer] = NULL; } ++ ilayer; } pMa->WelsFree (pCtx->ppRefPicListExt, "ppRefPicListExt"); pCtx->ppRefPicListExt = NULL; } // pSlice context list if (NULL != pCtx->pSliceCtxList && pParam != NULL) { ilayer = 0; while (ilayer < pParam->iNumDependencyLayer) { SSliceCtx* pSliceCtx = &pCtx->pSliceCtxList[ilayer]; if (NULL != pSliceCtx) UninitSlicePEncCtx (pSliceCtx, pMa); ++ ilayer; } pMa->WelsFree (pCtx->pSliceCtxList, "pSliceCtxList"); pCtx->pSliceCtxList = NULL; } // VAA if (NULL != pCtx->pVaa) { if (pCtx->pSvcParam->bEnableAdaptiveQuant) { //free mem pMa->WelsFree (pCtx->pVaa->sAdaptiveQuantParam.pMotionTextureUnit, "pVaa->sAdaptiveQuantParam.pMotionTextureUnit"); pCtx->pVaa->sAdaptiveQuantParam.pMotionTextureUnit = NULL; pMa->WelsFree (pCtx->pVaa->sAdaptiveQuantParam.pMotionTextureIndexToDeltaQp, "pVaa->sAdaptiveQuantParam.pMotionTextureIndexToDeltaQp"); pCtx->pVaa->sAdaptiveQuantParam.pMotionTextureIndexToDeltaQp = NULL; } pMa->WelsFree (pCtx->pVaa->pVaaBackgroundMbFlag, "pVaa->pVaaBackgroundMbFlag"); pCtx->pVaa->pVaaBackgroundMbFlag = NULL; pMa->WelsFree (pCtx->pVaa->sVaaCalcInfo.pSad8x8, "pVaa->sVaaCalcInfo.sad8x8"); pCtx->pVaa->sVaaCalcInfo.pSad8x8 = NULL; pMa->WelsFree (pCtx->pVaa->sVaaCalcInfo.pSsd16x16, "pVaa->sVaaCalcInfo.pSsd16x16"); pCtx->pVaa->sVaaCalcInfo.pSsd16x16 = NULL; pMa->WelsFree (pCtx->pVaa->sVaaCalcInfo.pSum16x16, "pVaa->sVaaCalcInfo.pSum16x16"); pCtx->pVaa->sVaaCalcInfo.pSum16x16 = NULL; pMa->WelsFree (pCtx->pVaa->sVaaCalcInfo.pSumOfSquare16x16, "pVaa->sVaaCalcInfo.pSumOfSquare16x16"); pCtx->pVaa->sVaaCalcInfo.pSumOfSquare16x16 = NULL; if (pCtx->pSvcParam->bEnableBackgroundDetection) { //BGD control pMa->WelsFree (pCtx->pVaa->sVaaCalcInfo.pSumOfDiff8x8, "pVaa->sVaaCalcInfo.pSumOfDiff8x8"); pCtx->pVaa->sVaaCalcInfo.pSumOfDiff8x8 = NULL; pMa->WelsFree (pCtx->pVaa->sVaaCalcInfo.pMad8x8, "pVaa->sVaaCalcInfo.pMad8x8"); pCtx->pVaa->sVaaCalcInfo.pMad8x8 = NULL; } pMa->WelsFree (pCtx->pVaa, "pVaa"); pCtx->pVaa = NULL; } WelsRcFreeMemory (pCtx); // rate control module memory free if (NULL != pCtx->pWelsSvcRc) { pMa->WelsFree (pCtx->pWelsSvcRc, "pWelsSvcRc"); pCtx->pWelsSvcRc = NULL; } /* MVD cost tables for Inter */ if (NULL != pCtx->pMvdCostTableInter) { pMa->WelsFree (pCtx->pMvdCostTableInter, "pMvdCostTableInter"); pCtx->pMvdCostTableInter = NULL; } #ifdef ENABLE_TRACE_FILE if (NULL != pCtx->pFileLog) { fclose (pCtx->pFileLog); pCtx->pFileLog = NULL; } pCtx->uiSizeLog = 0; #endif//ENABLE_TRACE_FILE FreeCodingParam (&pCtx->pSvcParam, pMa); if (NULL != pCtx->pFuncList) { pMa->WelsFree (pCtx->pFuncList, "SWelsFuncPtrList"); pCtx->pFuncList = NULL; } #if defined(MEMORY_MONITOR) assert (pMa->WelsGetMemoryUsage() == 0); // ensure all memory free well #endif//MEMORY_MONITOR if ((*ppCtx)->pMemAlign != NULL) { WelsLog (NULL, WELS_LOG_INFO, "FreeMemorySvc(), verify memory usage (%d bytes) after free..\n", (*ppCtx)->pMemAlign->WelsGetMemoryUsage()); delete (*ppCtx)->pMemAlign; (*ppCtx)->pMemAlign = NULL; } free (*ppCtx); *ppCtx = NULL; } } int32_t InitSliceSettings (SWelsSvcCodingParam* pCodingParam, const int32_t kiCpuCores, int16_t* pMaxSliceCount) { int32_t iSpatialIdx = 0, iSpatialNum = pCodingParam->iNumDependencyLayer; int16_t iMaxSliceCount = 0; do { SDLayerParam* pDlp = &pCodingParam->sDependencyLayers[iSpatialIdx]; SMulSliceOption* pMso = &pDlp->sMso; SSliceArgument* pSlcArg = &pMso->sSliceArgument; const int32_t kiMbWidth = (pDlp->iFrameWidth + 15) >> 4; const int32_t kiMbHeight = (pDlp->iFrameHeight + 15) >> 4; const int32_t kiMbNumInFrame = kiMbWidth * kiMbHeight; #if defined(MT_ENABLED) #if defined(DYNAMIC_SLICE_ASSIGN) int32_t iSliceNum = (SM_FIXEDSLCNUM_SLICE == pMso->uiSliceMode || SM_DYN_SLICE == pMso->uiSliceMode) ? kiCpuCores : pSlcArg->iSliceNum; // uiSliceNum per input has been validated at ParamValidationExt() #else//!DYNAMIC_SLICE_ASSIGN int32_t iSliceNum = (SM_DYN_SLICE == pMso->uiSliceMode) ? kiCpuCores : pSlcArg->uiSliceNum; // uiSliceNum per input has been validated at ParamValidationExt() #endif//DYNAMIC_SLICE_ASSIGN #else//!MT_ENABLED int16_t iSliceNum = pSlcArg->iSliceNum; // uiSliceNum per input has been validated at ParamValidationExt() #endif//MT_ENABLED // NOTE: Per design, in case MT/DYNAMIC_SLICE_ASSIGN enabled, for SM_FIXEDSLCNUM_SLICE mode, // uiSliceNum of current spatial layer settings equals to uiCpuCores number; SM_DYN_SLICE mode, // uiSliceNum intials as uiCpuCores also, stay tuned dynamically slicing in future pSlcArg->iSliceNum = iSliceNum; // used fixed one switch (pMso->uiSliceMode) { case SM_DYN_SLICE: iMaxSliceCount = AVERSLICENUM_CONSTRAINT; //#ifndef MT_ENABLED break; // go through for MT_ENABLED & SM_DYN_SLICE? //#endif//MT_ENABLED case SM_FIXEDSLCNUM_SLICE: if (iSliceNum > iMaxSliceCount) iMaxSliceCount = iSliceNum; // need perform check due uiSliceNum might change, although has been initialized somewhere outside if (pCodingParam->bEnableRc) { GomValidCheckSliceMbNum (kiMbWidth, kiMbHeight, pSlcArg); } else { CheckFixedSliceNumMultiSliceSetting (kiMbNumInFrame, pSlcArg); } break; case SM_SINGLE_SLICE: if (iSliceNum > iMaxSliceCount) iMaxSliceCount = iSliceNum; break; case SM_RASTER_SLICE: if (iSliceNum > iMaxSliceCount) iMaxSliceCount = iSliceNum; break; case SM_ROWMB_SLICE: if (iSliceNum > iMaxSliceCount) iMaxSliceCount = iSliceNum; break; default: break; } ++ iSpatialIdx; } while (iSpatialIdx < iSpatialNum); #ifdef MT_ENABLED pCodingParam->iCountThreadsNum = WELS_MIN (kiCpuCores, iMaxSliceCount); pCodingParam->iMultipleThreadIdc = pCodingParam->iCountThreadsNum; #else pCodingParam->iMultipleThreadIdc = 1; pCodingParam->iCountThreadsNum = 1; #endif//MT_ENABLED #ifndef WELS_TESTBED // for product release and non-SGE testing if (kiCpuCores < 2) { // single CPU core, make no sense for MT parallelization pCodingParam->iMultipleThreadIdc = 1; pCodingParam->iCountThreadsNum = 1; } #endif *pMaxSliceCount = iMaxSliceCount; return 0; } /*! * \brief log output for cpu features/capabilities */ void OutputCpuFeaturesLog (uint32_t uiCpuFeatureFlags, uint32_t uiCpuCores, int32_t iCacheLineSize) { // welstracer output WelsLog (NULL, WELS_LOG_INFO, "WELS CPU features/capacities (0x%x) detected: \t" \ "HTT: %c, " \ "MMX: %c, " \ "MMXEX: %c, " \ "SSE: %c, " \ "SSE2: %c, " \ "SSE3: %c, " \ "SSSE3: %c, " \ "SSE4.1: %c, " \ "SSE4.2: %c, " \ "AVX: %c, " \ "FMA: %c, " \ "X87-FPU: %c, " \ "3DNOW: %c, " \ "3DNOWEX: %c, " \ "ALTIVEC: %c, " \ "CMOV: %c, " \ "MOVBE: %c, " \ "AES: %c, " \ "NUMBER OF LOGIC PROCESSORS ON CHIP: %d, " \ "CPU CACHE LINE SIZE (BYTES): %d\n", uiCpuFeatureFlags, (uiCpuFeatureFlags & WELS_CPU_HTT) ? 'Y' : 'N', (uiCpuFeatureFlags & WELS_CPU_MMX) ? 'Y' : 'N', (uiCpuFeatureFlags & WELS_CPU_MMXEXT) ? 'Y' : 'N', (uiCpuFeatureFlags & WELS_CPU_SSE) ? 'Y' : 'N', (uiCpuFeatureFlags & WELS_CPU_SSE2) ? 'Y' : 'N', (uiCpuFeatureFlags & WELS_CPU_SSE3) ? 'Y' : 'N', (uiCpuFeatureFlags & WELS_CPU_SSSE3) ? 'Y' : 'N', (uiCpuFeatureFlags & WELS_CPU_SSE41) ? 'Y' : 'N', (uiCpuFeatureFlags & WELS_CPU_SSE42) ? 'Y' : 'N', (uiCpuFeatureFlags & WELS_CPU_AVX) ? 'Y' : 'N', (uiCpuFeatureFlags & WELS_CPU_FMA) ? 'Y' : 'N', (uiCpuFeatureFlags & WELS_CPU_FPU) ? 'Y' : 'N', (uiCpuFeatureFlags & WELS_CPU_3DNOW) ? 'Y' : 'N', (uiCpuFeatureFlags & WELS_CPU_3DNOWEXT) ? 'Y' : 'N', (uiCpuFeatureFlags & WELS_CPU_ALTIVEC) ? 'Y' : 'N', (uiCpuFeatureFlags & WELS_CPU_CMOV) ? 'Y' : 'N', (uiCpuFeatureFlags & WELS_CPU_MOVBE) ? 'Y' : 'N', (uiCpuFeatureFlags & WELS_CPU_AES) ? 'Y' : 'N', uiCpuCores, iCacheLineSize); #ifdef _DEBUG // output at console & _debug fprintf (stderr, "WELS CPU features/capacities (0x%x) detected: \n" \ "HTT: %c, " \ "MMX: %c, " \ "MMXEX: %c, " \ "SSE: %c, " \ "SSE2: %c, " \ "SSE3: %c, " \ "SSSE3: %c, " \ "SSE4.1: %c, " \ "SSE4.2: %c, " \ "AVX: %c, " \ "FMA: %c, " \ "X87-FPU: %c, " \ "3DNOW: %c, " \ "3DNOWEX: %c, " \ "ALTIVEC: %c, " \ "CMOV: %c, " \ "MOVBE: %c, " \ "AES: %c, " \ "NUMBER OF LOGIC PROCESSORS ON CHIP: %d, " \ "CPU CACHE LINE SIZE (BYTES): %d\n", uiCpuFeatureFlags, (uiCpuFeatureFlags & WELS_CPU_HTT) ? 'Y' : 'N', (uiCpuFeatureFlags & WELS_CPU_MMX) ? 'Y' : 'N', (uiCpuFeatureFlags & WELS_CPU_MMXEXT) ? 'Y' : 'N', (uiCpuFeatureFlags & WELS_CPU_SSE) ? 'Y' : 'N', (uiCpuFeatureFlags & WELS_CPU_SSE2) ? 'Y' : 'N', (uiCpuFeatureFlags & WELS_CPU_SSE3) ? 'Y' : 'N', (uiCpuFeatureFlags & WELS_CPU_SSSE3) ? 'Y' : 'N', (uiCpuFeatureFlags & WELS_CPU_SSE41) ? 'Y' : 'N', (uiCpuFeatureFlags & WELS_CPU_SSE42) ? 'Y' : 'N', (uiCpuFeatureFlags & WELS_CPU_AVX) ? 'Y' : 'N', (uiCpuFeatureFlags & WELS_CPU_FMA) ? 'Y' : 'N', (uiCpuFeatureFlags & WELS_CPU_FPU) ? 'Y' : 'N', (uiCpuFeatureFlags & WELS_CPU_3DNOW) ? 'Y' : 'N', (uiCpuFeatureFlags & WELS_CPU_3DNOWEXT) ? 'Y' : 'N', (uiCpuFeatureFlags & WELS_CPU_ALTIVEC) ? 'Y' : 'N', (uiCpuFeatureFlags & WELS_CPU_CMOV) ? 'Y' : 'N', (uiCpuFeatureFlags & WELS_CPU_MOVBE) ? 'Y' : 'N', (uiCpuFeatureFlags & WELS_CPU_AES) ? 'Y' : 'N', uiCpuCores, iCacheLineSize); #endif//_DEBUG } /*! * \brief initialize Wels avc encoder core library * \pParam ppCtx sWelsEncCtx** * \pParam pParam SWelsSvcCodingParam* * \return successful - 0; otherwise none 0 for failed */ int32_t WelsInitEncoderExt (sWelsEncCtx** ppCtx, SWelsSvcCodingParam* pCodingParam) { sWelsEncCtx* pCtx = NULL; int32_t iRet = 0; uint32_t uiCpuFeatureFlags = 0; // CPU features int32_t uiCpuCores = 1; // number of logic processors on physical processor package, one logic processor means HTT not supported int32_t iCacheLineSize = 16; // on chip cache line size in byte int16_t iSliceNum = 1; // number of slices used if (NULL == ppCtx || NULL == pCodingParam) { WelsLog (NULL, WELS_LOG_ERROR, "WelsInitEncoderExt(), NULL == ppCtx(0x%p) or NULL == pCodingParam(0x%p).\n", (void*)ppCtx, (void*)pCodingParam); return 1; } iRet = ParamValidationExt (pCodingParam); if (iRet != 0) { WelsLog (NULL, WELS_LOG_ERROR, "WelsInitEncoderExt(), ParamValidationExt failed return %d.\n", iRet); return iRet; } // for cpu features detection, Only detect once?? #ifdef X86_ASM uiCpuFeatureFlags = WelsCPUFeatureDetect (&uiCpuCores); // detect cpu capacity features if (uiCpuFeatureFlags & WELS_CPU_CACHELINE_128) iCacheLineSize = 128; else if (uiCpuFeatureFlags & WELS_CPU_CACHELINE_64) iCacheLineSize = 64; else if (uiCpuFeatureFlags & WELS_CPU_CACHELINE_32) iCacheLineSize = 32; else if (uiCpuFeatureFlags & WELS_CPU_CACHELINE_16) iCacheLineSize = 16; OutputCpuFeaturesLog (uiCpuFeatureFlags, uiCpuCores, iCacheLineSize); #else iCacheLineSize = 16; // 16 bytes aligned in default #endif//X86_ASM #ifndef WELS_TESTBED #if defined(MT_ENABLED) && defined(DYNAMIC_DETECT_CPU_CORES) if (pCodingParam->iMultipleThreadIdc > 0) uiCpuCores = pCodingParam->iMultipleThreadIdc; else { if (uiCpuFeatureFlags == 0) // cpuid not supported, use high level system API as followed to detect number of pysical/logic processor uiCpuCores = DynamicDetectCpuCores(); // So far so many cpu cores up to MAX_THREADS_NUM mean for server platforms, // for client application here it is constrained by maximal to MAX_THREADS_NUM if (uiCpuCores > MAX_THREADS_NUM) // MAX_THREADS_NUM uiCpuCores = MAX_THREADS_NUM; // MAX_THREADS_NUM else if (uiCpuCores < 1) // just for safe uiCpuCores = 1; } #endif//MT_ENABLED && DYNAMIC_DETECT_CPU_CORES #else//WELS_TESTBED uiCpuCores = pCodingParam->iMultipleThreadIdc; // assigned uiCpuCores from iMultipleThreadIdc from SGE testing #endif//WELS_TESTBED uiCpuCores = WELS_CLIP3 (uiCpuCores, 1, MAX_THREADS_NUM); if (InitSliceSettings (pCodingParam, uiCpuCores, &iSliceNum)) { WelsLog (NULL, WELS_LOG_ERROR, "WelsInitEncoderExt(), InitSliceSettings failed.\n"); return 1; } *ppCtx = NULL; pCtx = static_cast<sWelsEncCtx*> (malloc (sizeof (sWelsEncCtx))); WELS_VERIFY_RETURN_IF (1, (NULL == pCtx)) memset (pCtx, 0, sizeof (sWelsEncCtx)); pCtx->pMemAlign = new CMemoryAlign (iCacheLineSize); WELS_VERIFY_RETURN_PROC_IF (1, (NULL == pCtx->pMemAlign), FreeMemorySvc (&pCtx)) // for logs #ifdef ENABLE_TRACE_FILE if (wlog == WelsLogDefault) { str_t fname[MAX_FNAME_LEN] = {0}; #if defined (_MSC_VER) #if _MSC_VER>=1500 SNPRINTF (fname, MAX_FNAME_LEN, MAX_FNAME_LEN, "%swels_svc_encoder_trace.txt", pCodingParam->sTracePath); // confirmed_safe_unsafe_usage #else SNPRINTF (fname, MAX_FNAME_LEN, "%swels_svc_encoder_trace.txt", pCodingParam->sTracePath); // confirmed_safe_unsafe_usage #endif//_MSC_VER>=1500 #else //GNUC/ SNPRINTF (fname, MAX_FNAME_LEN, "%swels_svc_encoder_trace.txt", pCodingParam->sTracePath); // confirmed_safe_unsafe_usage #endif//_MSC_VER #if defined(__GNUC__) pCtx->pFileLog = FOPEN (fname, "wt+"); #else//WIN32 #if defined(_WIN32) && defined(_MSC_VER) #if _MSC_VER >= 1500 FOPEN (&pCtx->pFileLog, fname, "wt+"); #else pCtx->pFileLog = FOPEN (fname, "wt+"); #endif//_MSC_VER>=1500 #endif//WIN32 && _MSC_VER #endif//__GNUC__ pCtx->uiSizeLog = 0; } #endif//ENABLE_TRACE_FILE pCodingParam->DetermineTemporalSettings(); iRet = AllocCodingParam (&pCtx->pSvcParam, pCtx->pMemAlign, pCodingParam->iNumDependencyLayer); if (iRet != 0) { FreeMemorySvc (&pCtx); return iRet; } memcpy (pCtx->pSvcParam, pCodingParam, sizeof (SWelsSvcCodingParam)); // confirmed_safe_unsafe_usage pCtx->pFuncList = (SWelsFuncPtrList*)pCtx->pMemAlign->WelsMalloc (sizeof (SWelsFuncPtrList), "SWelsFuncPtrList"); if (NULL == pCtx->pFuncList) { FreeMemorySvc (&pCtx); return 1; } InitFunctionPointers (pCtx->pFuncList, pCtx->pSvcParam, uiCpuFeatureFlags); pCtx->iActiveThreadsNum = pCodingParam->iCountThreadsNum; pCtx->iMaxSliceCount = iSliceNum; iRet = RequestMemorySvc (&pCtx); if (iRet != 0) { WelsLog (pCtx, WELS_LOG_ERROR, "WelsInitEncoderExt(), RequestMemorySvc failed return %d.\n", iRet); FreeMemorySvc (&pCtx); return iRet; } #ifdef MT_ENABLED if (pCodingParam->iMultipleThreadIdc > 1) iRet = CreateSliceThreads (pCtx); #endif WelsRcInitModule (pCtx, pCtx->pSvcParam->bEnableRc ? WELS_RC_GOM : WELS_RC_DISABLE); pCtx->pVpp = new CWelsPreProcess ((void*)pCtx); if (pCtx->pVpp == NULL) { WelsLog (pCtx, WELS_LOG_ERROR, "WelsInitEncoderExt(), pOut of memory in case new CWelsPreProcess().\n"); FreeMemorySvc (&pCtx); return iRet; } #if defined(MEMORY_MONITOR) WelsLog (pCtx, WELS_LOG_INFO, "WelsInitEncoderExt() exit, overall memory usage: %llu bytes\n", static_cast<unsigned long long> (sizeof (sWelsEncCtx) /* requested size from malloc() or new operator */ + pCtx->pMemAlign->WelsGetMemoryUsage()) /* requested size from CMemoryAlign::WelsMalloc() */ ); #endif//MEMORY_MONITOR *ppCtx = pCtx; WelsLog (pCtx, WELS_LOG_DEBUG, "WelsInitEncoderExt(), pCtx= 0x%p.\n", (void*)pCtx); return 0; } /* * * status information output */ #if defined(STAT_OUTPUT) void StatOverallEncodingExt (sWelsEncCtx* pCtx) { int8_t i = 0; int8_t j = 0; for (i = 0; i < pCtx->pSvcParam->iNumDependencyLayer; i++) { fprintf (stdout, "\nDependency layer : %d\n", i); fprintf (stdout, "Quality layer : %d\n", j); { const int32_t iCount = pCtx->sStatData[i][j].sSliceData.iSliceCount[I_SLICE] + pCtx->sStatData[i][j].sSliceData.iSliceCount[P_SLICE] + pCtx->sStatData[i][j].sSliceData.iSliceCount[B_SLICE]; #if defined(MB_TYPES_CHECK) if (iCount > 0) { int32_t iCountNumIMb = pCtx->sStatData[i][j].sSliceData.iMbCount[I_SLICE][Intra4x4] + pCtx->sStatData[i][j].sSliceData.iMbCount[I_SLICE][Intra16x16] + pCtx->sStatData[i][j].sSliceData.iMbCount[I_SLICE][7]; int32_t iCountNumPMb = pCtx->sStatData[i][j].sSliceData.iMbCount[P_SLICE][Intra4x4] + pCtx->sStatData[i][j].sSliceData.iMbCount[P_SLICE][Intra16x16] + pCtx->sStatData[i][j].sSliceData.iMbCount[P_SLICE][7] + pCtx->sStatData[i][j].sSliceData.iMbCount[P_SLICE][Inter16x16] + pCtx->sStatData[i][j].sSliceData.iMbCount[P_SLICE][Inter16x8] + pCtx->sStatData[i][j].sSliceData.iMbCount[P_SLICE][Inter8x16] + pCtx->sStatData[i][j].sSliceData.iMbCount[P_SLICE][Inter8x8] + pCtx->sStatData[i][j].sSliceData.iMbCount[P_SLICE][10] + pCtx->sStatData[i][j].sSliceData.iMbCount[P_SLICE][PSkip]; int32_t count_p_mbL0 = pCtx->sStatData[i][j].sSliceData.iMbCount[P_SLICE][Inter16x16] + pCtx->sStatData[i][j].sSliceData.iMbCount[P_SLICE][Inter16x8] + pCtx->sStatData[i][j].sSliceData.iMbCount[P_SLICE][Inter8x16] + pCtx->sStatData[i][j].sSliceData.iMbCount[P_SLICE][Inter8x8] + pCtx->sStatData[i][j].sSliceData.iMbCount[P_SLICE][10]; int32_t iMbCount = iCountNumIMb + iCountNumPMb; if (iMbCount > 0) { fprintf (stderr, "SVC: overall Slices MBs: %d Avg\nI4x4: %.3f%% I16x16: %.3f%% IBL: %.3f%%\nP16x16: %.3f%% P16x8: %.3f%% P8x16: %.3f%% P8x8: %.3f%% SUBP8x8: %.3f%% PSKIP: %.3f%%\nILP(All): %.3f%% ILP(PL0): %.3f%% BLSKIP(PL0): %.3f%% RP(PL0): %.3f%%\n", iMbCount, (100.0f * (pCtx->sStatData[i][j].sSliceData.iMbCount[I_SLICE][Intra4x4] + pCtx->sStatData[i][j].sSliceData.iMbCount[P_SLICE][Intra4x4]) / iMbCount), (100.0f * (pCtx->sStatData[i][j].sSliceData.iMbCount[I_SLICE][Intra16x16] + pCtx->sStatData[i][j].sSliceData.iMbCount[P_SLICE][Intra16x16]) / iMbCount), (100.0f * (pCtx->sStatData[i][j].sSliceData.iMbCount[I_SLICE][7] + pCtx->sStatData[i][j].sSliceData.iMbCount[P_SLICE][7]) / iMbCount), (100.0f * pCtx->sStatData[i][j].sSliceData.iMbCount[P_SLICE][Inter16x16] / iMbCount), (100.0f * pCtx->sStatData[i][j].sSliceData.iMbCount[P_SLICE][Inter16x8] / iMbCount), (100.0f * pCtx->sStatData[i][j].sSliceData.iMbCount[P_SLICE][Inter8x16] / iMbCount), (100.0f * pCtx->sStatData[i][j].sSliceData.iMbCount[P_SLICE][Inter8x8] / iMbCount), (100.0f * pCtx->sStatData[i][j].sSliceData.iMbCount[P_SLICE][10] / iMbCount), (100.0f * pCtx->sStatData[i][j].sSliceData.iMbCount[P_SLICE][PSkip] / iMbCount), (100.0f * pCtx->sStatData[i][j].sSliceData.iMbCount[P_SLICE][11] / iMbCount), (100.0f * pCtx->sStatData[i][j].sSliceData.iMbCount[P_SLICE][11] / count_p_mbL0), (100.0f * pCtx->sStatData[i][j].sSliceData.iMbCount[P_SLICE][8] / count_p_mbL0), (100.0f * pCtx->sStatData[i][j].sSliceData.iMbCount[P_SLICE][9] / count_p_mbL0) ); } } #endif //#if defined(MB_TYPES_CHECK) if (iCount > 0) { fprintf (stdout, "SVC: overall PSNR Y: %2.3f U: %2.3f V: %2.3f kb/s: %.1f fps: %.3f\n\n", (pCtx->sStatData[i][j].sQualityStat.rYPsnr[I_SLICE] + pCtx->sStatData[i][j].sQualityStat.rYPsnr[P_SLICE] + pCtx->sStatData[i][j].sQualityStat.rYPsnr[B_SLICE]) / (float) (iCount), (pCtx->sStatData[i][j].sQualityStat.rUPsnr[I_SLICE] + pCtx->sStatData[i][j].sQualityStat.rUPsnr[P_SLICE] + pCtx->sStatData[i][j].sQualityStat.rUPsnr[B_SLICE]) / (float) (iCount), (pCtx->sStatData[i][j].sQualityStat.rVPsnr[I_SLICE] + pCtx->sStatData[i][j].sQualityStat.rVPsnr[P_SLICE] + pCtx->sStatData[i][j].sQualityStat.rVPsnr[B_SLICE]) / (float) (iCount), 1.0f * pCtx->pSvcParam->sDependencyLayers[i].fOutputFrameRate * (pCtx->sStatData[i][j].sSliceData.iSliceSize[I_SLICE] + pCtx->sStatData[i][j].sSliceData.iSliceSize[P_SLICE] + pCtx->sStatData[i][j].sSliceData.iSliceSize[B_SLICE]) / (float) ( iCount + pCtx->pWelsSvcRc[i].iSkipFrameNum) / 1000, 1.0f * pCtx->pSvcParam->sDependencyLayers[i].fOutputFrameRate); } } } } #endif /*! * \brief uninitialize Wels encoder core library * \pParam pEncCtx sWelsEncCtx* * \return none */ void WelsUninitEncoderExt (sWelsEncCtx** ppCtx) { if (NULL == ppCtx || NULL == *ppCtx) return; WelsLog (*ppCtx, WELS_LOG_INFO, "WelsUninitEncoderExt(), pCtx= %p, iThreadCount= %d, iMultipleThreadIdc= %d.\n", (void*) (*ppCtx), (*ppCtx)->pSvcParam->iCountThreadsNum, (*ppCtx)->pSvcParam->iMultipleThreadIdc); #if defined(STAT_OUTPUT) StatOverallEncodingExt (*ppCtx); #endif #if defined(MT_ENABLED) if ((*ppCtx)->pSvcParam->iMultipleThreadIdc > 1 && (*ppCtx)->pSliceThreading != NULL) { const int32_t iThreadCount = (*ppCtx)->pSvcParam->iCountThreadsNum; int32_t iThreadIdx = 0; #if defined(_WIN32) if ((*ppCtx)->pSliceThreading->pExitEncodeEvent != NULL) { do { if ((*ppCtx)->pSliceThreading->pThreadHandles[iThreadIdx] != NULL) // iThreadIdx is already created successfully WelsEventSignal (& (*ppCtx)->pSliceThreading->pExitEncodeEvent[iThreadIdx]); ++ iThreadIdx; } while (iThreadIdx < iThreadCount); WelsMultipleEventsWaitAllBlocking (iThreadCount, & (*ppCtx)->pSliceThreading->pFinSliceCodingEvent[0]); } #elif defined(__GNUC__) while (iThreadIdx < iThreadCount) { int res = 0; if ((*ppCtx)->pSliceThreading->pThreadHandles[iThreadIdx]) { res = WelsThreadCancel ((*ppCtx)->pSliceThreading->pThreadHandles[iThreadIdx]); WelsLog (*ppCtx, WELS_LOG_INFO, "WelsUninitEncoderExt(), WelsThreadCancel(pThreadHandles%d) return %d..\n", iThreadIdx, res); res = WelsThreadJoin ((*ppCtx)->pSliceThreading->pThreadHandles[iThreadIdx]); // waiting thread exit WelsLog (*ppCtx, WELS_LOG_INFO, "WelsUninitEncoderExt(), pthread_join(pThreadHandles%d) return %d..\n", iThreadIdx, res); (*ppCtx)->pSliceThreading->pThreadHandles[iThreadIdx] = 0; } #if defined(DYNAMIC_SLICE_ASSIGN) && defined(TRY_SLICING_BALANCE) if ((*ppCtx)->pSliceThreading->pUpdateMbListThrdHandles[iThreadIdx]) { res = WelsThreadCancel ((*ppCtx)->pSliceThreading->pUpdateMbListThrdHandles[iThreadIdx]); WelsLog (*ppCtx, WELS_LOG_INFO, "WelsUninitEncoderExt(), WelsThreadCancel(pUpdateMbListThrdHandles%d) return %d..\n", iThreadIdx, res); res = WelsThreadJoin ((*ppCtx)->pSliceThreading->pUpdateMbListThrdHandles[iThreadIdx]); // waiting thread exit WelsLog (*ppCtx, WELS_LOG_INFO, "WelsUninitEncoderExt(), pthread_join(pUpdateMbListThrdHandles%d) return %d..\n", iThreadIdx, res); (*ppCtx)->pSliceThreading->pUpdateMbListThrdHandles[iThreadIdx] = 0; } #endif//DYNAMIC_SLICE_ASSIGN && TRY_SLICING_BALANCE ++ iThreadIdx; } #endif//WIN32 } #endif//MT_ENABLED if ((*ppCtx)->pVpp) { delete (*ppCtx)->pVpp; (*ppCtx)->pVpp = NULL; } FreeMemorySvc (ppCtx); *ppCtx = NULL; } /*! * \brief get temporal level due to configuration and coding context */ static inline int32_t GetTemporalLevel (SDLayerParam* fDlp, const int32_t kiFrameNum, const int32_t kiGopSize) { const int32_t kiCodingIdx = kiFrameNum & (kiGopSize - 1); return fDlp->uiCodingIdx2TemporalId[kiCodingIdx]; } void DynslcUpdateMbNeighbourInfoListForAllSlices (SSliceCtx* pSliceCtx, SMB* pMbList) { const int32_t kiMbWidth = pSliceCtx->iMbWidth; const int32_t kiEndMbInSlice = pSliceCtx->iMbNumInFrame - 1; int32_t iIdx = 0; do { SMB* pMb = &pMbList[iIdx]; uint32_t uiNeighborAvailFlag = 0; const int32_t kiMbXY = pMb->iMbXY; const int32_t kiMbX = pMb->iMbX; const int32_t kiMbY = pMb->iMbY; BOOL_T bLeft; BOOL_T bTop; BOOL_T bLeftTop; BOOL_T bRightTop; int32_t uiSliceIdc; int32_t iLeftXY, iTopXY, iLeftTopXY, iRightTopXY; uiSliceIdc = WelsMbToSliceIdc (pSliceCtx, kiMbXY); pMb->uiSliceIdc = uiSliceIdc; iLeftXY = kiMbXY - 1; iTopXY = kiMbXY - kiMbWidth; iLeftTopXY = iTopXY - 1; iRightTopXY = iTopXY + 1; bLeft = (kiMbX > 0) && (uiSliceIdc == WelsMbToSliceIdc (pSliceCtx, iLeftXY)); bTop = (kiMbY > 0) && (uiSliceIdc == WelsMbToSliceIdc (pSliceCtx, iTopXY)); bLeftTop = (kiMbX > 0) && (kiMbY > 0) && (uiSliceIdc == WelsMbToSliceIdc (pSliceCtx, iLeftTopXY)); bRightTop = (kiMbX < (kiMbWidth - 1)) && (kiMbY > 0) && (uiSliceIdc == WelsMbToSliceIdc (pSliceCtx, iRightTopXY)); if (bLeft) { uiNeighborAvailFlag |= LEFT_MB_POS; } if (bTop) { uiNeighborAvailFlag |= TOP_MB_POS; } if (bLeftTop) { uiNeighborAvailFlag |= TOPLEFT_MB_POS; } if (bRightTop) { uiNeighborAvailFlag |= TOPRIGHT_MB_POS; } pMb->uiNeighborAvail = (uint8_t)uiNeighborAvailFlag; ++ iIdx; } while (iIdx <= kiEndMbInSlice); } /* * TUNE back if number of picture partition decision algorithm based on past if available */ int32_t PicPartitionNumDecision (sWelsEncCtx* pCtx) { int32_t iPartitionNum = 1; #ifdef MT_ENABLED if (pCtx->pSvcParam->iMultipleThreadIdc > 1) { iPartitionNum = pCtx->pSvcParam->iCountThreadsNum; #if !defined(FIXED_PARTITION_ASSIGN) if (P_SLICE == pCtx->eSliceType) iPartitionNum = 1; #endif//!FIXED_PARTITION_ASSIGN } return iPartitionNum; #else return iPartitionNum; #endif//MT_ENABLED } #if defined(MT_ENABLED) void WelsInitCurrentQBLayerMltslc (sWelsEncCtx* pCtx) { //pData init SDqLayer* pCurDq = pCtx->pCurDqLayer; SSliceCtx* pSliceCtx = (pCurDq->pSliceEncCtx); //mb_neighbor DynslcUpdateMbNeighbourInfoListForAllSlices (pSliceCtx, pCurDq->sMbDataP); } void UpdateSlicepEncCtxWithPartition (SSliceCtx* pSliceCtx, int32_t iPartitionNum) { const int32_t kiMbNumInFrame = pSliceCtx->iMbNumInFrame; int32_t iCountMbNumPerPartition = kiMbNumInFrame; int32_t iAssignableMbLeft = kiMbNumInFrame; int32_t iFirstMbIdx = 0; int32_t i/*, j*/; if (iPartitionNum <= 0) iPartitionNum = 1; else if (iPartitionNum > AVERSLICENUM_CONSTRAINT) iPartitionNum = AVERSLICENUM_CONSTRAINT; // AVERSLICENUM_CONSTRAINT might be variable, however not fixed by MACRO iCountMbNumPerPartition /= iPartitionNum; pSliceCtx->iSliceNumInFrame = iPartitionNum; i = 0; while (i < iPartitionNum) { if (i + 1 == iPartitionNum) { pSliceCtx->pCountMbNumInSlice[i] = iAssignableMbLeft; } else { pSliceCtx->pCountMbNumInSlice[i] = iCountMbNumPerPartition; } pSliceCtx->pFirstMbInSlice[i] = iFirstMbIdx; memset (pSliceCtx->pOverallMbMap + iFirstMbIdx, (uint8_t)i, pSliceCtx->pCountMbNumInSlice[i]*sizeof (uint8_t)); // for next partition(or pSlice) iFirstMbIdx += pSliceCtx->pCountMbNumInSlice[i]; iAssignableMbLeft -= pSliceCtx->pCountMbNumInSlice[i]; ++ i; } } void WelsInitCurrentDlayerMltslc (sWelsEncCtx* pCtx, int32_t iPartitionNum) { SDqLayer* pCurDq = pCtx->pCurDqLayer; SSliceCtx* pSliceCtx = pCurDq->pSliceEncCtx; UpdateSlicepEncCtxWithPartition (pSliceCtx, iPartitionNum); if (I_SLICE == pCtx->eSliceType) { //check if uiSliceSizeConstraint too small #define byte_complexIMBat26 (60) uint8_t iCurDid = pCtx->uiDependencyId; uint32_t uiFrmByte = 0; if (pCtx->pSvcParam->bEnableRc) { //RC case uiFrmByte = ( ((uint32_t) (pCtx->pSvcParam->sDependencyLayers[iCurDid].iSpatialBitrate) / (uint32_t) (pCtx->pSvcParam->sDependencyLayers[iCurDid].fInputFrameRate)) >> 3); } else { //fixed QP case const int32_t iTtlMbNumInFrame = pSliceCtx->iMbNumInFrame; int32_t iQDeltaTo26 = (26 - pCtx->pSvcParam->sDependencyLayers[iCurDid].iDLayerQp); uiFrmByte = (iTtlMbNumInFrame * byte_complexIMBat26); if (iQDeltaTo26 > 0) { //smaller QP than 26 uiFrmByte = (uint32_t) (uiFrmByte * ((float)iQDeltaTo26 / 4)); } else if (iQDeltaTo26 < 0) { //larger QP than 26 iQDeltaTo26 = ((-iQDeltaTo26) >> 2); //delta mod 4 uiFrmByte = (uiFrmByte >> (iQDeltaTo26)); //if delta 4, byte /2 } } //MINPACKETSIZE_CONSTRAINT if (pSliceCtx->uiSliceSizeConstraint < (uint32_t) (uiFrmByte//suppose 16 byte per mb at average / (pSliceCtx->iMaxSliceNumConstraint)) ) { WelsLog (pCtx, WELS_LOG_WARNING, "Set-SliceConstraint(%d) too small for current resolution (MB# %d) under QP/BR!\n", pSliceCtx->uiSliceSizeConstraint, pSliceCtx->iMbNumInFrame ); } } WelsInitCurrentQBLayerMltslc (pCtx); } #else void WelsInitCurrentQBLayerMltslc (sWelsEncCtx* pCtx) { //pData init SDqLayer* pCurDq = pCtx->pCurDqLayer; SSliceCtx* pSliceCtx = (pCurDq->pSliceEncCtx); SSlice* pSlice = &pCurDq->sLayerInfo.pSliceInLayer[0]; int32_t iTtlMbNumInFrame = pSliceCtx->iMbNumInFrame; //pSliceCtx memset (pSliceCtx->pOverallMbMap, 0, iTtlMbNumInFrame * sizeof (uint8_t)); memset (pSliceCtx->pCountMbNumInSlice, 0, pSliceCtx->iSliceNumInFrame * sizeof (int32_t)); memset (pSliceCtx->pFirstMbInSlice, 0, pSliceCtx->iSliceNumInFrame * sizeof (int16_t)); pSliceCtx->iSliceNumInFrame = 1;// pSliceCtx->pCountMbNumInSlice[0] = iTtlMbNumInFrame; //mb_neighbor DynslcUpdateMbNeighbourInfoListForAllSlices (pSliceCtx, pCurDq->sMbDataP); //pSlice init pSlice->uiSliceIdx = 0; pSlice->pSliceBsa = &pCtx->pOut->sBsWrite; pSlice->bDynamicSlicingSliceSizeCtrlFlag = false; pSlice->uiAssumeLog2BytePerMb = (pCtx->eSliceType == P_SLICE) ? 0 : 1; } void WelsInitCurrentDlayerMltslc (sWelsEncCtx* pCtx, int32_t iPartitionNum) { SDqLayer* pCurDq = pCtx->pCurDqLayer; SSliceCtx* pSliceCtx = (pCurDq->pSliceEncCtx); int32_t iTtlMbNumInFrame = pCurDq->iMbHeight * pCurDq->iMbWidth; pSliceCtx->iMbNumInFrame = pSliceCtx->pCountMbNumInSlice[0] = iTtlMbNumInFrame; if (I_SLICE == pCtx->eSliceType) { //check if uiSliceSizeConstraint too small #define byte_complexIMBat26 (60) uint8_t iCurDid = pCtx->uiDependencyId; uint32_t uiFrmByte = 0; if (pCtx->pSvcParam->bEnableRc) { //RC case uiFrmByte = ( ((uint32_t) (pCtx->pSvcParam->sDependencyLayers[iCurDid].iSpatialBitrate) / (uint32_t) (pCtx->pSvcParam->sDependencyLayers[iCurDid].fInputFrameRate)) >> 3); } else { //fixed QP case int32_t iQDeltaTo26 = (26 - pCtx->pSvcParam->sDependencyLayers[iCurDid].iDLayerQp); uiFrmByte = (iTtlMbNumInFrame * byte_complexIMBat26); if (iQDeltaTo26 > 0) { //smaller QP than 26 uiFrmByte = (uint32_t) (uiFrmByte * ((float)iQDeltaTo26 / 4)); } else if (iQDeltaTo26 < 0) { //larger QP than 26 iQDeltaTo26 = ((-iQDeltaTo26) >> 2); //delta mod 4 uiFrmByte = (uiFrmByte >> (iQDeltaTo26)); //if delta 4, byte /2 } } //MINPACKETSIZE_CONSTRAINT if (pSliceCtx->uiSliceSizeConstraint < (uint32_t) (uiFrmByte//suppose 16 byte per mb at average / (pSliceCtx->iMaxSliceNumConstraint)) ) { WelsLog (pCtx, WELS_LOG_WARNING, "Set-SliceConstraint(%d) too small for current resolution (MB# %d) under QP/BR!\n", pSliceCtx->uiSliceSizeConstraint, pSliceCtx->iMbNumInFrame ); } } WelsInitCurrentQBLayerMltslc (pCtx); } #endif /*! * \brief initialize current layer */ void WelsInitCurrentLayer (sWelsEncCtx* pCtx, const int32_t kiWidth, const int32_t kiHeight) { SWelsSvcCodingParam* pParam = pCtx->pSvcParam; SPicture* pEncPic = pCtx->pEncPic; SPicture* pDecPic = pCtx->pDecPic; SDqLayer* pCurDq = pCtx->pCurDqLayer; SSlice* pBaseSlice = &pCurDq->sLayerInfo.pSliceInLayer[0]; SSlice* pSlice = NULL; const uint8_t kiCurDid = pCtx->uiDependencyId; const bool_t kbUseSubsetSpsFlag = (kiCurDid > BASE_DEPENDENCY_ID); SDLayerParam* fDlp = &pParam->sDependencyLayers[kiCurDid]; SNalUnitHeaderExt* pNalHdExt = &pCurDq->sLayerInfo.sNalHeaderExt; SNalUnitHeader* pNalHd = &pNalHdExt->sNalHeader; SDqIdc* pDqIdc = &pCtx->pDqIdcMap[kiCurDid]; int32_t iIdx = 0; int32_t iSliceCount = 0; if (NULL == pCurDq) return; pCurDq->pDecPic = pDecPic; if (fDlp->sMso.uiSliceMode == SM_DYN_SLICE) // need get extra slices for update iSliceCount = GetInitialSliceNum (pCurDq->iMbWidth, pCurDq->iMbHeight, &fDlp->sMso); else iSliceCount = GetCurrentSliceNum (pCurDq->pSliceEncCtx); assert (iSliceCount > 0); pBaseSlice->sSliceHeaderExt.sSliceHeader.iPpsId = pDqIdc->iPpsId; pCurDq->sLayerInfo.pPpsP = pBaseSlice->sSliceHeaderExt.sSliceHeader.pPps = &pCtx->pPPSArray[pBaseSlice->sSliceHeaderExt.sSliceHeader.iPpsId]; pBaseSlice->sSliceHeaderExt.sSliceHeader.iSpsId = pDqIdc->iSpsId; if (kbUseSubsetSpsFlag) { pCurDq->sLayerInfo.pSubsetSpsP = &pCtx->pSubsetArray[pDqIdc->iSpsId]; pCurDq->sLayerInfo.pSpsP = pBaseSlice->sSliceHeaderExt.sSliceHeader.pSps = &pCurDq->sLayerInfo.pSubsetSpsP->pSps; } else { pCurDq->sLayerInfo.pSubsetSpsP = NULL; pCurDq->sLayerInfo.pSpsP = pBaseSlice->sSliceHeaderExt.sSliceHeader.pSps = &pCtx->pSpsArray[pBaseSlice->sSliceHeaderExt.sSliceHeader.iSpsId]; } pSlice = pBaseSlice; iIdx = 1; while (iIdx < iSliceCount) { ++ pSlice; pSlice->sSliceHeaderExt.sSliceHeader.iPpsId = pBaseSlice->sSliceHeaderExt.sSliceHeader.iPpsId; pSlice->sSliceHeaderExt.sSliceHeader.pPps = pBaseSlice->sSliceHeaderExt.sSliceHeader.pPps; pSlice->sSliceHeaderExt.sSliceHeader.iSpsId = pBaseSlice->sSliceHeaderExt.sSliceHeader.iSpsId; pSlice->sSliceHeaderExt.sSliceHeader.pSps = pBaseSlice->sSliceHeaderExt.sSliceHeader.pSps; ++ iIdx; } memset (pNalHdExt, 0, sizeof (SNalUnitHeaderExt)); pNalHd->uiNalRefIdc = pCtx->eNalPriority; pNalHd->eNalUnitType = pCtx->eNalType; pNalHdExt->uiDependencyId = kiCurDid; pNalHdExt->bDiscardableFlag = (pCtx->bNeedPrefixNalFlag) ? (pNalHd->uiNalRefIdc == NRI_PRI_LOWEST) : false; pNalHdExt->bIdrFlag = (pCtx->iFrameNum == 0) && ((pCtx->eNalType == NAL_UNIT_CODED_SLICE_IDR) || (pCtx->eSliceType == I_SLICE)); pNalHdExt->uiTemporalId = pCtx->uiTemporalId; pBaseSlice->bSliceHeaderExtFlag = (NAL_UNIT_CODED_SLICE_EXT == pNalHd->eNalUnitType); pSlice = pBaseSlice; iIdx = 1; while (iIdx < iSliceCount) { ++ pSlice; pSlice->bSliceHeaderExtFlag = pBaseSlice->bSliceHeaderExtFlag; ++ iIdx; } // pEncPic pData pCurDq->pEncData[0] = pEncPic->pData[0]; pCurDq->pEncData[1] = pEncPic->pData[1]; pCurDq->pEncData[2] = pEncPic->pData[2]; pCurDq->iEncStride[0] = pEncPic->iLineSize[0]; pCurDq->iEncStride[1] = pEncPic->iLineSize[1]; pCurDq->iEncStride[2] = pEncPic->iLineSize[2]; // cs pData pCurDq->pCsData[0] = pDecPic->pData[0]; pCurDq->pCsData[1] = pDecPic->pData[1]; pCurDq->pCsData[2] = pDecPic->pData[2]; pCurDq->iCsStride[0] = pDecPic->iLineSize[0]; pCurDq->iCsStride[1] = pDecPic->iLineSize[1]; pCurDq->iCsStride[2] = pDecPic->iLineSize[2]; if (pCurDq->pRefLayer != NULL) { pCurDq->bBaseLayerAvailableFlag = true; } else { pCurDq->bBaseLayerAvailableFlag = false; } } void PreprocessSliceCoding (sWelsEncCtx* pCtx) { SDqLayer* pCurLayer = pCtx->pCurDqLayer; const bool_t kbBaseAvail = pCurLayer->bBaseLayerAvailableFlag; /* function pointers conditional assignment under sWelsEncCtx, layer_mb_enc_rec (in stack) is exclusive */ if (P_SLICE == pCtx->eSliceType) { if (kbBaseAvail) { if (pCtx->pSvcParam->iNumDependencyLayer == (pCurLayer->sLayerInfo.sNalHeaderExt.uiDependencyId + 1)) { // pCtx->pFuncList->pfMotionSearch = WelsMotionEstimateSearchSad; pCtx->pFuncList->pfFirstIntraMode = WelsMdFirstIntraMode; pCtx->pFuncList->pfIntraFineMd = WelsMdIntraFinePartitionVaa; pCtx->pFuncList->pfInterFineMd = WelsMdInterFinePartitionVaa; pCtx->pFuncList->sSampleDealingFuncs.pfIntra8x8Combined3 = pCtx->pFuncList->sSampleDealingFuncs.pfIntra8x8Combined3Sad; pCtx->pFuncList->sSampleDealingFuncs.pfIntra16x16Combined3 = pCtx->pFuncList->sSampleDealingFuncs.pfIntra16x16Combined3Sad; pCtx->pFuncList->sSampleDealingFuncs.pfMdCost = pCtx->pFuncList->sSampleDealingFuncs.pfSampleSad; } else { pCtx->pFuncList->pfMotionSearch = WelsMotionEstimateSearchSatd; pCtx->pFuncList->pfFirstIntraMode = WelsMdFirstIntraMode; pCtx->pFuncList->pfIntraFineMd = WelsMdIntraFinePartition; pCtx->pFuncList->pfInterFineMd = WelsMdInterFinePartition; pCtx->pFuncList->sSampleDealingFuncs.pfMdCost = pCtx->pFuncList->sSampleDealingFuncs.pfSampleSatd; pCtx->pFuncList->sSampleDealingFuncs.pfIntra16x16Combined3 = pCtx->pFuncList->sSampleDealingFuncs.pfIntra16x16Combined3Satd; pCtx->pFuncList->sSampleDealingFuncs.pfIntra8x8Combined3 = pCtx->pFuncList->sSampleDealingFuncs.pfIntra8x8Combined3Satd; pCtx->pFuncList->sSampleDealingFuncs.pfIntra4x4Combined3 = pCtx->pFuncList->sSampleDealingFuncs.pfIntra4x4Combined3Satd; } pCtx->pFuncList->sSampleDealingFuncs.pfMeCost = pCtx->pFuncList->sSampleDealingFuncs.pfSampleSatd; } else { //case 3: pBase layer MD + encoding if (pCurLayer->sLayerInfo.sNalHeaderExt.uiDependencyId + 1 == pCtx->pSvcParam->iNumDependencyLayer) { pCtx->pFuncList->pfMotionSearch = WelsMotionEstimateSearchSad; pCtx->pFuncList->pfFirstIntraMode = WelsMdFirstIntraMode; pCtx->pFuncList->pfIntraFineMd = WelsMdIntraFinePartitionVaa; pCtx->pFuncList->pfInterFineMd = WelsMdInterFinePartitionVaa; pCtx->pFuncList->sSampleDealingFuncs.pfMdCost = pCtx->pFuncList->sSampleDealingFuncs.pfSampleSad; pCtx->pFuncList->sSampleDealingFuncs.pfIntra16x16Combined3 = pCtx->pFuncList->sSampleDealingFuncs.pfIntra16x16Combined3Sad; pCtx->pFuncList->sSampleDealingFuncs.pfIntra8x8Combined3 = pCtx->pFuncList->sSampleDealingFuncs.pfIntra8x8Combined3Sad; } else { pCtx->pFuncList->pfMotionSearch = WelsMotionEstimateSearchSatd; pCtx->pFuncList->pfFirstIntraMode = WelsMdFirstIntraMode; pCtx->pFuncList->pfIntraFineMd = WelsMdIntraFinePartition; pCtx->pFuncList->pfInterFineMd = WelsMdInterFinePartition; pCtx->pFuncList->sSampleDealingFuncs.pfMdCost = pCtx->pFuncList->sSampleDealingFuncs.pfSampleSatd; pCtx->pFuncList->sSampleDealingFuncs.pfIntra16x16Combined3 = pCtx->pFuncList->sSampleDealingFuncs.pfIntra16x16Combined3Satd; pCtx->pFuncList->sSampleDealingFuncs.pfIntra8x8Combined3 = pCtx->pFuncList->sSampleDealingFuncs.pfIntra8x8Combined3Satd; pCtx->pFuncList->sSampleDealingFuncs.pfIntra4x4Combined3 = pCtx->pFuncList->sSampleDealingFuncs.pfIntra4x4Combined3Satd; } pCtx->pFuncList->sSampleDealingFuncs.pfMeCost = pCtx->pFuncList->sSampleDealingFuncs.pfSampleSatd; } } else if (I_SLICE == pCtx->eSliceType) { if (pCurLayer->sLayerInfo.sNalHeaderExt.uiDependencyId + 1 == pCtx->pSvcParam->iNumDependencyLayer) { pCtx->pFuncList->sSampleDealingFuncs.pfMdCost = pCtx->pFuncList->sSampleDealingFuncs.pfSampleSad; pCtx->pFuncList->sSampleDealingFuncs.pfIntra16x16Combined3 = pCtx->pFuncList->sSampleDealingFuncs.pfIntra16x16Combined3Sad; pCtx->pFuncList->sSampleDealingFuncs.pfIntra8x8Combined3 = pCtx->pFuncList->sSampleDealingFuncs.pfIntra8x8Combined3Sad; pCtx->pFuncList->pfIntraFineMd = WelsMdIntraFinePartitionVaa; } else { pCtx->pFuncList->sSampleDealingFuncs.pfMdCost = pCtx->pFuncList->sSampleDealingFuncs.pfSampleSatd; pCtx->pFuncList->sSampleDealingFuncs.pfIntra16x16Combined3 = pCtx->pFuncList->sSampleDealingFuncs.pfIntra16x16Combined3Satd; pCtx->pFuncList->sSampleDealingFuncs.pfIntra8x8Combined3 = pCtx->pFuncList->sSampleDealingFuncs.pfIntra8x8Combined3Satd; pCtx->pFuncList->sSampleDealingFuncs.pfIntra4x4Combined3 = pCtx->pFuncList->sSampleDealingFuncs.pfIntra4x4Combined3Satd; pCtx->pFuncList->pfIntraFineMd = WelsMdIntraFinePartition; } } } /*! * \brief swap pDq layers between current pDq layer and reference pDq layer */ static inline void WelsSwapDqLayers (sWelsEncCtx* pCtx) { // swap and assign reference const int32_t kiDid = pCtx->uiDependencyId; const int32_t kiNextDqIdx = 1 + kiDid; SDqLayer* pTmpLayer = pCtx->ppDqLayerList[kiNextDqIdx]; SDqLayer* pRefLayer = pCtx->pCurDqLayer; pCtx->pCurDqLayer = pTmpLayer; pCtx->pCurDqLayer->pRefLayer = pRefLayer; } /*! * \brief prefetch reference picture after WelsBuildRefList */ static inline void PrefetchReferencePicture (sWelsEncCtx* pCtx, const EFrameType keFrameType) { SSlice* pSliceBase = &pCtx->pCurDqLayer->sLayerInfo.pSliceInLayer[0]; const int32_t kiSliceCount = GetCurrentSliceNum (pCtx->pCurDqLayer->pSliceEncCtx); int32_t iIdx = 0; uint8_t uiRefIdx = -1; assert (kiSliceCount > 0); if (keFrameType != WELS_FRAME_TYPE_IDR) { assert (pCtx->iNumRef0 > 0); pCtx->pRefPic = pCtx->pRefList0[0]; // always get item 0 due to reordering done pCtx->pCurDqLayer->pRefPic = pCtx->pRefPic; uiRefIdx = 0; // reordered reference iIndex } else { // safe for IDR coding pCtx->pRefPic = NULL; pCtx->pCurDqLayer->pRefPic = NULL; } iIdx = 0; while (iIdx < kiSliceCount) { pSliceBase->sSliceHeaderExt.sSliceHeader.uiRefIndex = uiRefIdx; ++ pSliceBase; ++ iIdx; } } void ParasetIdAdditionIdAdjust (SParaSetOffsetVariable* sParaSetOffsetVariable, const int32_t kiCurEncoderParaSetId, const uint32_t kuiMaxIdInBs) { //paraset_type = 0: SPS; =1: PPS //SPS_ID in avc_sps and pSubsetSps will be different using this //SPS_ID case example: //1st enter: next_spsid_in_bs == 0; spsid == 0; delta==0; //actual spsid_in_bs == 0 //1st finish: next_spsid_in_bs == 1; //2nd enter: next_spsid_in_bs == 1; spsid == 0; delta==1; //actual spsid_in_bs == 1 //2nd finish: next_spsid_in_bs == 2; //31st enter: next_spsid_in_bs == 31; spsid == 0~2; delta==31~29; //actual spsid_in_bs == 31 //31st finish: next_spsid_in_bs == 0; //31st enter: next_spsid_in_bs == 0; spsid == 0~2; delta==-2~0; //actual spsid_in_bs == 0 //31st finish: next_spsid_in_bs == 1; const int32_t kiEncId = kiCurEncoderParaSetId; const uint32_t kuiPrevIdInBs = sParaSetOffsetVariable->iParaSetIdDelta[kiEncId] + kiEncId;//mark current_id const bool_t* kpUsedIdPointer = &sParaSetOffsetVariable->bUsedParaSetIdInBs[0]; uint32_t uiNextIdInBs = sParaSetOffsetVariable->uiNextParaSetIdToUseInBs; #if _DEBUG if (0 != sParaSetOffsetVariable->iParaSetIdDelta[kiEncId]) assert (sParaSetOffsetVariable->bUsedParaSetIdInBs[kuiPrevIdInBs]); //sure the prev-used one was marked activated correctly #endif //update current layer's pCodingParam sParaSetOffsetVariable->iParaSetIdDelta[kiEncId] = uiNextIdInBs - kiEncId; //for current parameter set, change its id_delta //write pso pData for next update: sParaSetOffsetVariable->bUsedParaSetIdInBs[kuiPrevIdInBs] = false; // sParaSetOffsetVariable->bUsedParaSetIdInBs[uiNextIdInBs] = true; // update current used_id //prepare for next update: // find the next avaibable iId do { ++uiNextIdInBs; if (uiNextIdInBs >= kuiMaxIdInBs) { uiNextIdInBs = 0;//ensure the SPS_ID wound not exceed MAX_SPS_COUNT } } while (kpUsedIdPointer[uiNextIdInBs]); // update next_id sParaSetOffsetVariable->uiNextParaSetIdToUseInBs = uiNextIdInBs; #if _DEBUG assert (!sParaSetOffsetVariable->bUsedParaSetIdInBs[uiNextIdInBs]); //sure the next-to-use one is marked activated correctly #endif } /*! * \brief write all parameter sets introduced in SVC extension * \return size in bytes of bitstream wrote */ int32_t WelsWriteParameterSets (sWelsEncCtx* pCtx, int32_t* pNalLen, int32_t* pNumNal) { int32_t iSize = 0; int32_t iNal = 0; int32_t iIdx = 0; int32_t iId = 0; int32_t iCountNal = 0; if (NULL == pCtx || NULL == pNalLen || NULL == pNumNal) return 0; /* write all SPS */ iIdx = 0; while (iIdx < pCtx->iSpsNum) { SDqIdc* pDqIdc = &pCtx->pDqIdcMap[iIdx]; const int32_t kiDid = pDqIdc->uiSpatialId; const bool_t kbUsingSubsetSps = (kiDid > BASE_DEPENDENCY_ID); iNal = pCtx->pOut->iNalIndex; if (pCtx->pSvcParam->bEnableSpsPpsIdAddition) { #if _DEBUG pCtx->sPSOVector.bEnableSpsPpsIdAddition = 1; assert (kiDid < MAX_DEPENDENCY_LAYER); assert (iIdx < MAX_DQ_LAYER_NUM); #endif ParasetIdAdditionIdAdjust (& (pCtx->sPSOVector.sParaSetOffsetVariable[kbUsingSubsetSps ? PARA_SET_TYPE_SUBSETSPS : PARA_SET_TYPE_AVCSPS]), (kbUsingSubsetSps) ? (pCtx->pSubsetArray[iIdx - 1].pSps.uiSpsId) : (pCtx->pSpsArray[0].uiSpsId), MAX_SPS_COUNT); } else { memset (& (pCtx->sPSOVector), 0, sizeof (pCtx->sPSOVector)); } if (kbUsingSubsetSps) { iId = iIdx - 1; /* generate Subset SPS */ WelsLoadNal (pCtx->pOut, NAL_UNIT_SUBSET_SPS, NRI_PRI_HIGHEST); WelsWriteSubsetSpsSyntax (&pCtx->pSubsetArray[iId], &pCtx->pOut->sBsWrite, & (pCtx->sPSOVector.sParaSetOffsetVariable[PARA_SET_TYPE_SUBSETSPS].iParaSetIdDelta[0])); WelsUnloadNal (pCtx->pOut); } else { iId = 0; /* generate sequence parameters set */ WelsLoadNal (pCtx->pOut, NAL_UNIT_SPS, NRI_PRI_HIGHEST); WelsWriteSpsNal (&pCtx->pSpsArray[0], &pCtx->pOut->sBsWrite, & (pCtx->sPSOVector.sParaSetOffsetVariable[PARA_SET_TYPE_AVCSPS].iParaSetIdDelta[0])); WelsUnloadNal (pCtx->pOut); } pNalLen[iCountNal] = WelsEncodeNal (&pCtx->pOut->sNalList[iNal], pCtx->pFrameBs + pCtx->iPosBsBuffer, &pNalLen[iCountNal]); pCtx->iPosBsBuffer += pNalLen[iCountNal]; iSize += pNalLen[iCountNal]; ++ iIdx; ++ iCountNal; } /* write all PPS */ iIdx = 0; while (iIdx < pCtx->iPpsNum) { if (pCtx->pSvcParam->bEnableSpsPpsIdAddition) { //para_set_type = 2: PPS, use MAX_PPS_COUNT ParasetIdAdditionIdAdjust (&pCtx->sPSOVector.sParaSetOffsetVariable[PARA_SET_TYPE_PPS], pCtx->pPPSArray[iIdx].iPpsId, MAX_PPS_COUNT); } iNal = pCtx->pOut->iNalIndex; /* generate picture parameter set */ WelsLoadNal (pCtx->pOut, NAL_UNIT_PPS, NRI_PRI_HIGHEST); WelsWritePpsSyntax (&pCtx->pPPSArray[iIdx], &pCtx->pOut->sBsWrite, & (pCtx->sPSOVector)); WelsUnloadNal (pCtx->pOut); pNalLen[iCountNal] = WelsEncodeNal (&pCtx->pOut->sNalList[iNal], pCtx->pFrameBs + pCtx->iPosBsBuffer, &pNalLen[iCountNal]); pCtx->iPosBsBuffer += pNalLen[iCountNal]; iSize += pNalLen[iCountNal]; ++ iIdx; ++ iCountNal; } *pNumNal = iCountNal; return iSize; } static inline int32_t AddPrefixNal (sWelsEncCtx* pCtx, SLayerBSInfo* pLayerBsInfo, int32_t* pNalLen, int32_t* pNalIdxInLayer, const EWelsNalUnitType keNalType, const EWelsNalRefIdc keNalRefIdc) { int32_t iPayloadSize = 0; if (keNalRefIdc != NRI_PRI_LOWEST) { WelsLoadNal (pCtx->pOut, NAL_UNIT_PREFIX, keNalRefIdc); WelsWriteSVCPrefixNal (&pCtx->pOut->sBsWrite, keNalRefIdc, (NAL_UNIT_CODED_SLICE_IDR == keNalType)); WelsUnloadNal (pCtx->pOut); iPayloadSize = WelsEncodeNalExt (&pCtx->pOut->sNalList[pCtx->pOut->iNalIndex - 1], &pCtx->pCurDqLayer->sLayerInfo.sNalHeaderExt, pCtx->pFrameBs + pCtx->iPosBsBuffer, &pNalLen[*pNalIdxInLayer]); pCtx->iPosBsBuffer += iPayloadSize; pLayerBsInfo->iNalLengthInByte[*pNalIdxInLayer] = iPayloadSize; (*pNalIdxInLayer) ++; } else { // No Prefix NAL Unit RBSP syntax here, but need add NAL Unit Header extension WelsLoadNal (pCtx->pOut, NAL_UNIT_PREFIX, keNalRefIdc); // No need write any syntax of prefix NAL Unit RBSP here WelsUnloadNal (pCtx->pOut); iPayloadSize = WelsEncodeNalExt (&pCtx->pOut->sNalList[pCtx->pOut->iNalIndex - 1], &pCtx->pCurDqLayer->sLayerInfo.sNalHeaderExt, pCtx->pFrameBs + pCtx->iPosBsBuffer, &pNalLen[*pNalIdxInLayer]); pCtx->iPosBsBuffer += iPayloadSize; pLayerBsInfo->iNalLengthInByte[*pNalIdxInLayer] = iPayloadSize; (*pNalIdxInLayer) ++; } return iPayloadSize; } int32_t WritePadding (sWelsEncCtx* pCtx, int32_t iLen) { int32_t i = 0; int32_t iNal = 0; SBitStringAux* pBs = NULL; int32_t iNalLen; int32_t iSize = 0; iNal = pCtx->pOut->iNalIndex; pBs = &pCtx->pOut->sBsWrite; // SBitStringAux instance for non VCL NALs decoding if ((pBs->pBufEnd - pBs->pBufPtr) < iLen || iNal >= pCtx->pOut->iCountNals) { #if GOM_TRACE_FLAG WelsLog (pCtx, WELS_LOG_ERROR, "[RC] paddingcal pBuffer overflow, bufferlen=%lld, paddinglen=%d, iNalIdx= %d, iCountNals= %d\n", static_cast<long long int> (pBs->pBufEnd - pBs->pBufPtr), iLen, iNal, pCtx->pOut->iCountNals); #endif return 0; } WelsLoadNal (pCtx->pOut, NAL_UNIT_FILLER_DATA, NRI_PRI_LOWEST); for (i = 0; i < iLen; i++) { BsWriteBits (pBs, 8, 0xff); } BsRbspTrailingBits (pBs); BsFlush (pBs); WelsUnloadNal (pCtx->pOut); iNalLen = WelsEncodeNal (&pCtx->pOut->sNalList[iNal], pCtx->pFrameBs + pCtx->iPosBsBuffer, &iNalLen); pCtx->iPosBsBuffer += iNalLen; iSize += iNalLen; return iSize; } /* * post process of dynamic slicing bs writing in case PACKING_ONE_SLICE_PER_LAYER * include: count bs size of over all the slices in layer, * return: count number of slices in layer */ #if defined(MT_ENABLED) && defined(PACKING_ONE_SLICE_PER_LAYER) int32_t PostProcDynamicSlicingBsWriting (sWelsEncCtx* pCtx, SLayerBSInfo* pLayerBsInfo, int32_t* pLayerSize, const int32_t kiPartitionCnt) { SDqLayer* pCurDq = pCtx->pCurDqLayer; int32_t iPartitionIdx = 0; int32_t iCheckingIdx = 0; int32_t iSwappingIdx = -1; int32_t iSliceCount = 0; int32_t iLayerSize = 0; // count number of slices in layer and layer size while (iPartitionIdx < kiPartitionCnt) { const int32_t coded_slice_cnt = pCurDq->pNumSliceCodedOfPartition[iPartitionIdx]; iLayerSize += pCtx->pSliceThreading->pCountBsSizeInPartition[iPartitionIdx]; iSliceCount += coded_slice_cnt; ++ iPartitionIdx; } *pLayerSize = iLayerSize; // reordering pLayerBs pointers, but do not ensure raster scan order of picture // just maintain discontinuous items,i.e, // input: // partition 1: uiSliceIdx: 0 2 4 6 // partition 2: uiSliceIdx: 1 3 5 7 9 11 13 // output: // uiSliceIdx: 0 1 2 3 4 5 6 7 8 9 10 iCheckingIdx = 0; while (true) { bool_t bMatchFlag = false; iPartitionIdx = 0; while (iPartitionIdx < kiPartitionCnt) { const int32_t coded_slice_cnt = pCurDq->pNumSliceCodedOfPartition[iPartitionIdx]; // iCheckingIdx need convert to iIndex of iPartitionIdx based to avoid linear searching // belong this partition and not exceed the number of slices coded in partition if (iPartitionIdx == (iCheckingIdx % kiPartitionCnt) && iCheckingIdx / kiPartitionCnt < coded_slice_cnt) { if (iSwappingIdx >= 0) { // memory swapping memmove (pLayerBsInfo + iSwappingIdx, LayerBsInfo + iCheckingIdx, sizeof (SLayerBSInfo)); // confirmed_safe_unsafe_usage ++ iSwappingIdx; // record iSwappingIdx } ++ iCheckingIdx; bMatchFlag = true; break; } ++ iPartitionIdx; } if (!bMatchFlag) { if (iSwappingIdx < 0) iSwappingIdx = iCheckingIdx; ++ iCheckingIdx; } if (iSwappingIdx >= iSliceCount) break; } return iSliceCount; } #endif//MT_ENABLED && PACKING_ONE_SLICE_PER_LAYER /* * Force coding IDR as follows */ int32_t ForceCodingIDR (sWelsEncCtx* pCtx) { if (NULL == pCtx) return 1; pCtx->bEncCurFrmAsIdrFlag = true; pCtx->iCodingIndex = 0; return 0; } int32_t WelsEncoderEncodeParameterSets (sWelsEncCtx* pCtx, void* pDst) { SFrameBSInfo* pFbi = (SFrameBSInfo*)pDst; SLayerBSInfo* pLayerBsInfo = &pFbi->sLayerInfo[0]; int32_t iNalLen[128] = {0}; int32_t iCountNal = 0; pLayerBsInfo->pBsBuf = pCtx->pFrameBs; InitBits (&pCtx->pOut->sBsWrite, pCtx->pOut->pBsBuffer, pCtx->pOut->uiSize); WelsWriteParameterSets (pCtx, &iNalLen[0], &iCountNal); pLayerBsInfo->uiPriorityId = 0; pLayerBsInfo->uiSpatialId = 0; pLayerBsInfo->uiTemporalId = 0; pLayerBsInfo->uiQualityId = 0; pLayerBsInfo->uiLayerType = NON_VIDEO_CODING_LAYER; pLayerBsInfo->iNalCount = iCountNal; for (int32_t iNalIndex = 0; iNalIndex < iCountNal; ++ iNalIndex) { pLayerBsInfo->iNalLengthInByte[iNalIndex] = iNalLen[iNalIndex]; } pCtx->eLastNalPriority = NRI_PRI_HIGHEST; pFbi->iLayerNum = 1; #if defined(X86_ASM) WelsEmms(); #endif //X86_ASM return 0; } /*! * \brief core svc encoding process * * \pParam pCtx sWelsEncCtx*, encoder context * \pParam pDst FrameBSInfo* * \pParam pSrc SSourcePicture* for need_ds = true or SSourcePicture** for need_ds = false * \pParam iConfiguredLayerNum =1 in case need_ds = true or >1 in case need_ds = false * \pParam need_ds Indicate whether need down sampling desired * [NO in picture list case, YES in console aplication based] * \return EFrameType (WELS_FRAME_TYPE_IDR/WELS_FRAME_TYPE_I/WELS_FRAME_TYPE_P) */ int32_t WelsEncoderEncodeExt (sWelsEncCtx* pCtx, void* pDst, const SSourcePicture** ppSrcList, const int32_t iConfiguredLayerNum) { SFrameBSInfo* pFbi = (SFrameBSInfo*)pDst; SLayerBSInfo* pLayerBsInfo = &pFbi->sLayerInfo[0]; SWelsSvcCodingParam* pSvcParam = pCtx->pSvcParam; SSpatialPicIndex* pSpatialIndexMap = &pCtx->sSpatialIndexMap[0]; #if defined(ENABLE_FRAME_DUMP) || defined(ENABLE_PSNR_CALC) SPicture* fsnr = NULL; #endif//ENABLE_FRAME_DUMP || ENABLE_PSNR_CALC SPicture* pEncPic = NULL; // to be decided later #if defined(MT_ENABLED) && (defined(DYNAMIC_SLICE_ASSIGN) || defined(MT_DEBUG)) int32_t did_list[MAX_DEPENDENCY_LAYER] = {0}; #endif//MT_ENABLED && DYNAMIC_SLICE_ASSIGN int32_t iLayerNum = 0; int32_t iLayerSize = 0; int32_t iSpatialNum = 0; // available count number of spatial layers due to frame size changed in this given frame int32_t iSpatialIdx = 0; // iIndex of spatial layers due to frame size changed in this given frame int32_t iFrameSize = 0; int32_t iNalLen[128] = {0}; int32_t iNalIdxInLayer = 0; int32_t iCountNal = 0; EFrameType eFrameType = WELS_FRAME_TYPE_AUTO; int32_t iCurWidth = 0; int32_t iCurHeight = 0; EWelsNalUnitType eNalType = NAL_UNIT_UNSPEC_0; EWelsNalRefIdc eNalRefIdc = NRI_PRI_LOWEST; int8_t iCurDid = 0; int8_t iCurTid = 0; bool_t bAvcBased = false; #if defined(ENABLE_PSNR_CALC) real32_t snr_y = .0f, snr_u = .0f, snr_v = .0f; #endif//ENABLE_PSNR_CALC #if defined(_DEBUG) int32_t i = 0, j = 0, k = 0; #endif//_DEBUG pFbi->iLayerNum = 0; // for initialization // perform csc/denoise/downsample/padding, generate spatial layers iSpatialNum = pCtx->pVpp->WelsPreprocessStep1 (pCtx, ppSrcList, iConfiguredLayerNum); if (iSpatialNum < 1) { // skip due to temporal layer settings (different frame rate) ++ pCtx->iCodingIndex; return WELS_FRAME_TYPE_SKIP; } eFrameType = DecideFrameType (pCtx, iSpatialNum); if (eFrameType == WELS_FRAME_TYPE_SKIP) return eFrameType; InitFrameCoding (pCtx, eFrameType); iCurTid = GetTemporalLevel (&pSvcParam->sDependencyLayers[pSpatialIndexMap->iDid], pCtx->iCodingIndex, pSvcParam->uiGopSize); pCtx->uiTemporalId = iCurTid; pLayerBsInfo->pBsBuf = pCtx->pFrameBs ; if (eFrameType == WELS_FRAME_TYPE_IDR) { ++ pCtx->sPSOVector.uiIdrPicId; //if ( pSvcParam->bEnableSSEI ) // write parameter sets bitstream here WelsWriteParameterSets (pCtx, &iNalLen[0], &iCountNal); pLayerBsInfo->uiPriorityId = 0; pLayerBsInfo->uiSpatialId = 0; pLayerBsInfo->uiTemporalId = 0; pLayerBsInfo->uiQualityId = 0; pLayerBsInfo->uiLayerType = NON_VIDEO_CODING_LAYER; pLayerBsInfo->iNalCount = iCountNal; for (int32_t iNalIndex = 0; iNalIndex < iCountNal; ++ iNalIndex) { pLayerBsInfo->iNalLengthInByte[iNalIndex] = iNalLen[iNalIndex]; } ++ pLayerBsInfo; pLayerBsInfo->pBsBuf = pCtx->pFrameBs + pCtx->iPosBsBuffer; ++ iLayerNum; } pCtx->pCurDqLayer = pCtx->ppDqLayerList[pSpatialIndexMap->iDid]; pCtx->pCurDqLayer->pRefLayer = NULL; while (iSpatialIdx < iSpatialNum) { const int32_t d_idx = (pSpatialIndexMap + iSpatialIdx)->iDid; // get iDid SDLayerParam* param_d = &pSvcParam->sDependencyLayers[d_idx]; pCtx->uiDependencyId = iCurDid = (int8_t)d_idx; pCtx->pVpp->WelsPreprocessStep3 (pCtx, d_idx); pCtx->pEncPic = pEncPic = (pSpatialIndexMap + iSpatialIdx)->pSrc; pCtx->pEncPic->iPictureType = pCtx->eSliceType; pCtx->pEncPic->iFramePoc = pCtx->iPOC; iCurWidth = param_d->iFrameWidth; iCurHeight = param_d->iFrameHeight; #if defined(MT_ENABLED) && (defined(DYNAMIC_SLICE_ASSIGN) || defined(MT_DEBUG)) did_list[iSpatialIdx] = iCurDid; #endif//MT_ENABLED && DYNAMIC_SLICE_ASSIGN // Encoding this picture might mulitiple sQualityStat layers potentially be encoded as followed switch (param_d->sMso.uiSliceMode) { case SM_FIXEDSLCNUM_SLICE: { #if defined(MT_ENABLED) && defined(DYNAMIC_SLICE_ASSIGN) if ((iCurDid > 0) && (pSvcParam->iMultipleThreadIdc > 1) && (pSvcParam->sDependencyLayers[iCurDid].sMso.uiSliceMode == SM_FIXEDSLCNUM_SLICE && pSvcParam->iMultipleThreadIdc >= pSvcParam->sDependencyLayers[iCurDid].sMso.sSliceArgument.iSliceNum) ) AdjustEnhanceLayer (pCtx, iCurDid); #endif//MT_ENABLED && DYNAMIC_SLICE_ASSIGN break; } case SM_DYN_SLICE: { int32_t iPicIPartitionNum = PicPartitionNumDecision (pCtx); // MT compatibility pCtx->iActiveThreadsNum = iPicIPartitionNum; // we try to active number of threads, equal to number of picture partitions WelsInitCurrentDlayerMltslc (pCtx, iPicIPartitionNum); break; } default: { break; } } /* coding each spatial layer, only one sQualityStat layer within spatial support */ int32_t iSliceCount = 1; if (iLayerNum >= MAX_LAYER_NUM_OF_FRAME) { // check available layer_bs_info writing as follows WelsLog (pCtx, WELS_LOG_ERROR, "WelsEncoderEncodeExt(), iLayerNum(%d) overflow(max:%d)!", iLayerNum, MAX_LAYER_NUM_OF_FRAME); return -1; } iNalIdxInLayer = 0; bAvcBased = (iCurDid == BASE_DEPENDENCY_ID); pCtx->bNeedPrefixNalFlag = (bAvcBased && (pSvcParam->bPrefixNalAddingCtrl || (pSvcParam->iNumDependencyLayer > 1))); if (eFrameType == WELS_FRAME_TYPE_P) { eNalType = bAvcBased ? NAL_UNIT_CODED_SLICE : NAL_UNIT_CODED_SLICE_EXT; } else if (eFrameType == WELS_FRAME_TYPE_IDR) { eNalType = bAvcBased ? NAL_UNIT_CODED_SLICE_IDR : NAL_UNIT_CODED_SLICE_EXT; } if (iCurTid == 0 || pCtx->eSliceType == I_SLICE) eNalRefIdc = NRI_PRI_HIGHEST; else if (iCurTid == pSvcParam->iDecompStages) eNalRefIdc = NRI_PRI_LOWEST; else if (1 + iCurTid == pSvcParam->iDecompStages) eNalRefIdc = NRI_PRI_LOW; else // more details for other temporal layers? eNalRefIdc = NRI_PRI_HIGHEST; pCtx->eNalType = eNalType; pCtx->eNalPriority = eNalRefIdc; pCtx->pDecPic = pCtx->ppRefPicListExt[iCurDid]->pNextBuffer; #if defined(ENABLE_FRAME_DUMP) || defined(ENABLE_PSNR_CALC) fsnr = pCtx->pDecPic; #endif//#if defined(ENABLE_FRAME_DUMP) || defined(ENABLE_PSNR_CALC) pCtx->pDecPic->iPictureType = pCtx->eSliceType; pCtx->pDecPic->iFramePoc = pCtx->iPOC; WelsInitCurrentLayer (pCtx, iCurWidth, iCurHeight); WelsMarkPic (pCtx); if (!WelsBuildRefList (pCtx, pCtx->iPOC)) { // Force coding IDR as followed ForceCodingIDR (pCtx); WelsLog (pCtx, WELS_LOG_WARNING, "WelsEncoderEncodeExt(), WelsBuildRefList failed for P frames, pCtx->iNumRef0= %d.\n", pCtx->iNumRef0); return -1; } #ifdef LONG_TERM_REF_DUMP dump_ref (pCtx); #endif WelsUpdateRefSyntax (pCtx, pCtx->iPOC, eFrameType); //get reordering syntax used for writing slice header and transmit to encoder. PrefetchReferencePicture (pCtx, eFrameType); // update reference picture for current pDq layer pCtx->pFuncList->pfRc.pfWelsRcPictureInit (pCtx); PreprocessSliceCoding (pCtx); // MUST be called after pfWelsRcPictureInit() and WelsInitCurrentLayer() iLayerSize = 0; if (SM_SINGLE_SLICE == param_d->sMso.uiSliceMode) { // only one slice within a sQualityStat layer int32_t iSliceSize = 0; if (pCtx->bNeedPrefixNalFlag) { iLayerSize += AddPrefixNal (pCtx, pLayerBsInfo, &iNalLen[0], &iNalIdxInLayer, eNalType, eNalRefIdc); } WelsLoadNal (pCtx->pOut, eNalType, eNalRefIdc); WelsCodeOneSlice (pCtx, 0, eNalType); WelsUnloadNal (pCtx->pOut); iSliceSize = WelsEncodeNalExt (&pCtx->pOut->sNalList[pCtx->pOut->iNalIndex - 1], &pCtx->pCurDqLayer->sLayerInfo.sNalHeaderExt, pCtx->pFrameBs + pCtx->iPosBsBuffer, &iNalLen[iNalIdxInLayer]); iLayerSize += iSliceSize; pCtx->iPosBsBuffer += iSliceSize; pLayerBsInfo->uiLayerType = VIDEO_CODING_LAYER; pLayerBsInfo->uiSpatialId = iCurDid; pLayerBsInfo->uiTemporalId = iCurTid; pLayerBsInfo->uiQualityId = 0; pLayerBsInfo->uiPriorityId = 0; pLayerBsInfo->iNalLengthInByte[iNalIdxInLayer] = iSliceSize; pLayerBsInfo->iNalCount = ++ iNalIdxInLayer; } // for dynamic slicing single threading.. #ifndef MT_ENABLED else if (SM_DYN_SLICE == param_d->sMso.uiSliceMode) #else // MT_ENABLED else if ((SM_DYN_SLICE == param_d->sMso.uiSliceMode) && (pSvcParam->iMultipleThreadIdc <= 1)) #endif//MT_ENABLED { const int32_t kiLastMbInFrame = pCtx->pCurDqLayer->pSliceEncCtx->iMbNumInFrame; WelsCodeOnePicPartition (pCtx, pLayerBsInfo, &iNalIdxInLayer, &iLayerSize, 0, kiLastMbInFrame, 0); } else { //other multi-slice uiSliceMode #if defined(MT_ENABLED) int err = 0; // THREAD_FULLY_FIRE_MODE/THREAD_PICK_UP_MODE for any mode of non-SM_DYN_SLICE if ((SM_DYN_SLICE != param_d->sMso.uiSliceMode) && (pSvcParam->iMultipleThreadIdc > 1)) { iSliceCount = GetCurrentSliceNum (pCtx->pCurDqLayer->pSliceEncCtx); if (iLayerNum + #if defined(PACKING_ONE_SLICE_PER_LAYER) iSliceCount #else 1 #endif//PACKING_ONE_SLICE_PER_LAYER >= MAX_LAYER_NUM_OF_FRAME) { // check available layer_bs_info for further writing as followed WelsLog (pCtx, WELS_LOG_ERROR, "WelsEncoderEncodeExt(), iLayerNum(%d) overflow(max:%d) at iDid= %d uiSliceMode= %d, iSliceCount= %d!", iLayerNum, MAX_LAYER_NUM_OF_FRAME, iCurDid, param_d->sMso.uiSliceMode, iSliceCount); return -1; } if (iSliceCount <= 1) { WelsLog (pCtx, WELS_LOG_ERROR, "WelsEncoderEncodeExt(), iSliceCount(%d) from GetCurrentSliceNum() is untrusted due stack/heap crupted!\n", iSliceCount); return -1; } if (pSvcParam->iCountThreadsNum >= iSliceCount) { //THREAD_FULLY_FIRE_MODE #if defined(PACKING_ONE_SLICE_PER_LAYER) int32_t iSliceIdx = 1; int32_t iOrgSlicePos[MAX_SLICES_NUM] = {0}; iOrgSlicePos[0] = pCtx->iPosBsBuffer; while (uiSliceIdx < iSliceCount) { iOrgSlicePos[uiSliceIdx] = pCtx->pSliceBs[uiSliceIdx].uiBsPos; ++ uiSliceIdx; } #elif defined(MT_DEBUG) int64_t t_bs_append = 0; #endif//PACKING_ONE_SLICE_PER_LAYER pCtx->iActiveThreadsNum = iSliceCount; // to fire slice coding threads err = FiredSliceThreads (&pCtx->pSliceThreading->pThreadPEncCtx[0], &pCtx->pSliceThreading->pReadySliceCodingEvent[0], pLayerBsInfo, iSliceCount, pCtx->pCurDqLayer->pSliceEncCtx, FALSE); if (err) { WelsLog (pCtx, WELS_LOG_ERROR, "[MT] WelsEncoderEncodeExt(), FiredSliceThreads return(%d) failed and exit encoding frame, iCountThreadsNum= %d, iSliceCount= %d, uiSliceMode= %d, iMultipleThreadIdc= %d!!\n", err, pSvcParam->iCountThreadsNum, iSliceCount, param_d->sMso.uiSliceMode, pSvcParam->iMultipleThreadIdc); return -1; } WelsMultipleEventsWaitAllBlocking (iSliceCount, &pCtx->pSliceThreading->pSliceCodedEvent[0]); // all slices are finished coding here // append exclusive slice 0 bs to pFrameBs #if defined(PACKING_ONE_SLICE_PER_LAYER) iLayerSize = pCtx->iPosBsBuffer - iOrgSlicePos[0]; uiSliceIdx = 1; while (uiSliceIdx < iSliceCount) { iLayerSize += pCtx->pSliceBs[uiSliceIdx].uiBsPos - iOrgSlicePos[uiSliceIdx]; ++ uiSliceIdx; } iLayerNum += iSliceCount; // each slice stickly output as layer info for performance improvement directly pLayerBsInfo += iSliceCount; #else #if defined(MT_DEBUG) t_bs_append = WelsTime(); #endif//MT_DEBUG iLayerSize = AppendSliceToFrameBs (pCtx, pLayerBsInfo, iSliceCount); #if defined(MT_DEBUG) t_bs_append = WelsTime() - t_bs_append; if (pCtx->pSliceThreading->pFSliceDiff) { fprintf (pCtx->pSliceThreading->pFSliceDiff, #if defined(_WIN32) "%6I64d us consumed at AppendSliceToFrameBs() for coding_idx: %d iDid: %d qid: %d\n", #else "%6lld us consumed at AppendSliceToFrameBs() for coding_idx: %d iDid: %d qid: %d\n", #endif//WIN32 t_bs_append, pCtx->iCodingIndex, iCurDid, 0); } #endif//MT_DEBUG #endif//PACKING_ONE_SLICE_PER_LAYER } else { //THREAD_PICK_UP_MODE int32_t iNumThreadsRunning = 0; int32_t iNumThreadsScheduled = 0; int32_t iIndexOfSliceToBeCoded = 0; #if defined(PACKING_ONE_SLICE_PER_LAYER) int32_t iSliceIdx = 1; int32_t iOrgSlicePos[MAX_SLICES_NUM] = {0}; iOrgSlicePos[0] = pCtx->iPosBsBuffer; while (uiSliceIdx < iSliceCount) { iOrgSlicePos[uiSliceIdx] = pCtx->pSliceBs[uiSliceIdx].uiBsPos; ++ uiSliceIdx; } #endif//PACKING_ONE_SLICE_PER_LAYER pCtx->iActiveThreadsNum = pSvcParam->iCountThreadsNum; iNumThreadsScheduled = pCtx->iActiveThreadsNum; iNumThreadsRunning = iNumThreadsScheduled; // to fire slice coding threads err = FiredSliceThreads (&pCtx->pSliceThreading->pThreadPEncCtx[0], &pCtx->pSliceThreading->pReadySliceCodingEvent[0], pLayerBsInfo, iNumThreadsRunning, pCtx->pCurDqLayer->pSliceEncCtx, FALSE); if (err) { WelsLog (pCtx, WELS_LOG_ERROR, "[MT] WelsEncoderEncodeExt(), FiredSliceThreads return(%d) failed and exit encoding frame, iCountThreadsNum= %d, iSliceCount= %d, uiSliceMode= %d, iMultipleThreadIdc= %d!!\n", err, pSvcParam->iCountThreadsNum, iSliceCount, param_d->sMso.uiSliceMode, pSvcParam->iMultipleThreadIdc); return -1; } iIndexOfSliceToBeCoded = iNumThreadsRunning; while (1) { if (iIndexOfSliceToBeCoded >= iSliceCount && iNumThreadsRunning <= 0) break; #ifdef _WIN32 WELS_THREAD_ERROR_CODE lwait = 0; int32_t iEventId = -1; lwait = WelsMultipleEventsWaitSingleBlocking (iNumThreadsScheduled, &pCtx->pSliceThreading->pSliceCodedEvent[0], 2); // 2 ms for one tick iEventId = (int32_t) (lwait - WELS_THREAD_ERROR_WAIT_OBJECT_0); if (iEventId >= 0 && iEventId < iNumThreadsScheduled) { if (iIndexOfSliceToBeCoded < iSliceCount) { // pick up succeeding slice for threading // thread_id equal to iEventId per implementation here pCtx->pSliceThreading->pThreadPEncCtx[iEventId].iSliceIndex = iIndexOfSliceToBeCoded; #ifdef PACKING_ONE_SLICE_PER_LAYER pCtx->pSliceThreading->pThreadPEncCtx[iEventId].pLayerBs = pLayerBsInfo + iIndexOfSliceToBeCoded; #endif//PACKING_ONE_SLICE_PER_LAYER WelsEventSignal (&pCtx->pSliceThreading->pReadySliceCodingEvent[iEventId]); ++ iIndexOfSliceToBeCoded; } else { // no other slices left for coding -- iNumThreadsRunning; } } else { WelsSleep (1); } #else//__GNUC__ // TODO for pthread platforms // alternate implementation using blocking due non-blocking with timeout mode not support at wels thread lib, tune back if available WelsMultipleEventsWaitAllBlocking (iNumThreadsRunning, &pCtx->pSliceThreading->pSliceCodedEvent[0]); if (iIndexOfSliceToBeCoded < iSliceCount) { int32_t iThreadIdx = 0; // pick up succeeding slices for threading if left while (iThreadIdx < iNumThreadsScheduled) { if (iIndexOfSliceToBeCoded >= iSliceCount) break; pCtx->pSliceThreading->pThreadPEncCtx[iThreadIdx].iSliceIndex = iIndexOfSliceToBeCoded; #ifdef PACKING_ONE_SLICE_PER_LAYER pCtx->pSliceThreading->pThreadPEncCtx[iThreadIdx].pLayerBs = pLayerBsInfo + iIndexOfSliceToBeCoded; #endif//PACKING_ONE_SLICE_PER_LAYER WelsEventSignal (pCtx->pSliceThreading->pReadySliceCodingEvent[iThreadIdx]); ++ iIndexOfSliceToBeCoded; ++ iThreadIdx; } // update iNumThreadsRunning iNumThreadsRunning = iThreadIdx; } else { iNumThreadsRunning = 0; } #endif//_WIN32 }//while(1) // all slices are finished coding here // append exclusive slice 0 bs to pFrameBs #if defined(PACKING_ONE_SLICE_PER_LAYER) iLayerSize = pCtx->iPosBsBuffer - iOrgSlicePos[0]; uiSliceIdx = 1; while (uiSliceIdx < iSliceCount) { iLayerSize += pCtx->pSliceBs[uiSliceIdx].uiBsPos - iOrgSlicePos[uiSliceIdx]; ++ uiSliceIdx; } iLayerNum += iSliceCount; // each slice stickly output as layer info for performance improvement directly pLayerBsInfo += iSliceCount; #else iLayerSize = AppendSliceToFrameBs (pCtx, pLayerBsInfo, iSliceCount); #endif//PACKING_ONE_SLICE_PER_LAYER } } // THREAD_FULLY_FIRE_MODE && SM_DYN_SLICE else if ((SM_DYN_SLICE == param_d->sMso.uiSliceMode) && (pSvcParam->iMultipleThreadIdc > 1)) { const int32_t kiPartitionCnt = pCtx->iActiveThreadsNum; //pSvcParam->iCountThreadsNum; #if defined(PACKING_ONE_SLICE_PER_LAYER) ResetCountBsSizeInPartitions (pCtx->pSliceThreading->pCountBsSizeInPartition, kiPartitionCnt); pCtx->pCurDqLayer->pSliceEncCtx->iMaxSliceNumConstraint = WELS_MIN (MAX_SLICES_NUM, DynamicMaxSliceNumConstraint (MAX_LAYER_NUM_OF_FRAME, iLayerNum, 1 + /*( num_qlayer - 1) +*/ (((iCurDid == 0) && (pSvcParam->uiGopSize > 1)) ? 1 : 0))); #endif//PACKING_ONE_SLICE_PER_LAYER // to fire slice coding threads err = FiredSliceThreads (&pCtx->pSliceThreading->pThreadPEncCtx[0], &pCtx->pSliceThreading->pReadySliceCodingEvent[0], pLayerBsInfo, kiPartitionCnt, pCtx->pCurDqLayer->pSliceEncCtx, TRUE); if (err) { WelsLog (pCtx, WELS_LOG_ERROR, "[MT] WelsEncoderEncodeExt(), FiredSliceThreads return(%d) failed and exit encoding frame, iCountThreadsNum= %d, iSliceCount= %d, uiSliceMode= %d, iMultipleThreadIdc= %d!!\n", err, pSvcParam->iCountThreadsNum, iSliceCount, param_d->sMso.uiSliceMode, pSvcParam->iMultipleThreadIdc); return -1; } WelsMultipleEventsWaitAllBlocking (kiPartitionCnt, &pCtx->pSliceThreading->pSliceCodedEvent[0]); #if defined(PACKING_ONE_SLICE_PER_LAYER) iSliceCount = PostProcDynamicSlicingBsWriting (pCtx, pLayerBsInfo, &iLayerSize, kiPartitionCnt); assert (iLayerNum + iSliceCount < MAX_LAYER_NUM_OF_FRAME); pLayerBsInfo += iSliceCount; iLayerNum += iSliceCount; #else iLayerSize = AppendSliceToFrameBs (pCtx, pLayerBsInfo, kiPartitionCnt); #endif//PACKING_ONE_SLICE_PER_LAYER } else // for non-dynamic-slicing mode single threading branch.. #endif//MT_ENABLED { const bool_t bNeedPrefix = pCtx->bNeedPrefixNalFlag; int32_t iSliceIdx = 0; iSliceCount = GetCurrentSliceNum (pCtx->pCurDqLayer->pSliceEncCtx); while (iSliceIdx < iSliceCount) { int32_t iSliceSize = 0; if (bNeedPrefix) { iLayerSize += AddPrefixNal (pCtx, pLayerBsInfo, &iNalLen[0], &iNalIdxInLayer, eNalType, eNalRefIdc); } WelsLoadNal (pCtx->pOut, eNalType, eNalRefIdc); WelsCodeOneSlice (pCtx, iSliceIdx, eNalType); WelsUnloadNal (pCtx->pOut); iSliceSize = WelsEncodeNalExt (&pCtx->pOut->sNalList[pCtx->pOut->iNalIndex - 1], &pCtx->pCurDqLayer->sLayerInfo.sNalHeaderExt, pCtx->pFrameBs + pCtx->iPosBsBuffer, &iNalLen[iNalIdxInLayer]); pCtx->iPosBsBuffer += iSliceSize; iLayerSize += iSliceSize; pLayerBsInfo->iNalLengthInByte[iNalIdxInLayer] = iSliceSize; #if defined(SLICE_INFO_OUTPUT) fprintf (stderr, "@slice=%-6d sliceType:%c idc:%d size:%-6d\n", iSliceIdx, (pCtx->eSliceType == P_SLICE ? 'P' : 'I'), eNalRefIdc, iSliceSize); #endif//SLICE_INFO_OUTPUT ++ iNalIdxInLayer; ++ iSliceIdx; } pLayerBsInfo->uiLayerType = VIDEO_CODING_LAYER; pLayerBsInfo->uiSpatialId = iCurDid; pLayerBsInfo->uiTemporalId = iCurTid; pLayerBsInfo->uiQualityId = 0; pLayerBsInfo->uiPriorityId = 0; pLayerBsInfo->iNalCount = iNalIdxInLayer; } } // deblocking filter if ( #if defined(MT_ENABLED) (!pCtx->pCurDqLayer->bDeblockingParallelFlag) && #endif//MT_ENABLED #if !defined(ENABLE_FRAME_DUMP) ((eNalRefIdc != NRI_PRI_LOWEST) && (param_d->iHighestTemporalId == 0 || iCurTid < param_d->iHighestTemporalId)) && #endif//!ENABLE_FRAME_DUMP true ) { PerformDeblockingFilter (pCtx); } // reference picture list update if (eNalRefIdc != NRI_PRI_LOWEST) { if (!WelsUpdateRefList (pCtx)) { // Force coding IDR as followed ForceCodingIDR (pCtx); WelsLog (pCtx, WELS_LOG_WARNING, "WelsEncoderEncodeExt(), WelsUpdateRefList failed.\n"); return -1; } } iFrameSize += iLayerSize; pCtx->pFuncList->pfRc.pfWelsRcPictureInfoUpdate (pCtx, iLayerSize); #ifdef ENABLE_FRAME_DUMP // Dump reconstruction picture for each sQualityStat layer if (iCurDid + 1 < pSvcParam->iNumDependencyLayer) DumpDependencyRec (fsnr, ¶m_d->sRecFileName[0], iCurDid); #endif//ENABLE_FRAME_DUMP #if defined(ENABLE_PSNR_CALC) snr_y = WelsCalcPsnr (fsnr->pData[0], fsnr->iLineSize[0], pEncPic->pData[0], pEncPic->iLineSize[0], iCurWidth, iCurHeight); snr_u = WelsCalcPsnr (fsnr->pData[1], fsnr->iLineSize[1], pEncPic->pData[1], pEncPic->iLineSize[1], (iCurWidth >> 1), (iCurHeight >> 1)); snr_v = WelsCalcPsnr (fsnr->pData[2], fsnr->iLineSize[2], pEncPic->pData[2], pEncPic->iLineSize[2], (iCurWidth >> 1), (iCurHeight >> 1)); #endif//ENABLE_PSNR_CALC #if defined(LAYER_INFO_OUTPUT) fprintf (stderr, "%2s %5d: %-5d %2s T%1d D%1d Q%-2d QP%3d Y%2.2f U%2.2f V%2.2f %8d bits\n", (iSpatialIdx == 0) ? "#AU" : " ", pCtx->iPOC, pCtx->iFrameNum, (uiFrameType == WELS_FRAME_TYPE_I || uiFrameType == WELS_FRAME_TYPE_IDR) ? "I" : "P", iCurTid, iCurDid, 0, pCtx->pWelsSvcRc[pCtx->uiDependencyId].iAverageFrameQp, snr_y, snr_u, snr_v, (iLayerSize << 3)); #endif//LAYER_INFO_OUTPUT #if defined(STAT_OUTPUT) #if defined(ENABLE_PSNR_CALC) { pCtx->sStatData[iCurDid][0].sQualityStat.rYPsnr[pCtx->eSliceType] += snr_y; pCtx->sStatData[iCurDid][0].sQualityStat.rUPsnr[pCtx->eSliceType] += snr_u; pCtx->sStatData[iCurDid][0].sQualityStat.rVPsnr[pCtx->eSliceType] += snr_v; } #endif//ENABLE_PSNR_CALC #if defined(MB_TYPES_CHECK) //091025, frame output if (pCtx->eSliceType == P_SLICE) { pCtx->sStatData[iCurDid][0].sSliceData.iMbCount[P_SLICE][Intra4x4] += pCtx->sPerInfo.iMbCount[P_SLICE][Intra4x4]; pCtx->sStatData[iCurDid][0].sSliceData.iMbCount[P_SLICE][Intra16x16] += pCtx->sPerInfo.iMbCount[P_SLICE][Intra16x16]; pCtx->sStatData[iCurDid][0].sSliceData.iMbCount[P_SLICE][Inter16x16] += pCtx->sPerInfo.iMbCount[P_SLICE][Inter16x16]; pCtx->sStatData[iCurDid][0].sSliceData.iMbCount[P_SLICE][Inter16x8] += pCtx->sPerInfo.iMbCount[P_SLICE][Inter16x8]; pCtx->sStatData[iCurDid][0].sSliceData.iMbCount[P_SLICE][Inter8x16] += pCtx->sPerInfo.iMbCount[P_SLICE][Inter8x16]; pCtx->sStatData[iCurDid][0].sSliceData.iMbCount[P_SLICE][Inter8x8] += pCtx->sPerInfo.iMbCount[P_SLICE][Inter8x8]; pCtx->sStatData[iCurDid][0].sSliceData.iMbCount[P_SLICE][PSkip] += pCtx->sPerInfo.iMbCount[P_SLICE][PSkip]; pCtx->sStatData[iCurDid][0].sSliceData.iMbCount[P_SLICE][8] += pCtx->sPerInfo.iMbCount[P_SLICE][8]; pCtx->sStatData[iCurDid][0].sSliceData.iMbCount[P_SLICE][9] += pCtx->sPerInfo.iMbCount[P_SLICE][9]; pCtx->sStatData[iCurDid][0].sSliceData.iMbCount[P_SLICE][10] += pCtx->sPerInfo.iMbCount[P_SLICE][10]; pCtx->sStatData[iCurDid][0].sSliceData.iMbCount[P_SLICE][11] += pCtx->sPerInfo.iMbCount[P_SLICE][11]; } else if (pCtx->eSliceType == I_SLICE) { pCtx->sStatData[iCurDid][0].sSliceData.iMbCount[I_SLICE][Intra4x4] += pCtx->sPerInfo.iMbCount[I_SLICE][Intra4x4]; pCtx->sStatData[iCurDid][0].sSliceData.iMbCount[I_SLICE][Intra16x16] += pCtx->sPerInfo.iMbCount[I_SLICE][Intra16x16]; pCtx->sStatData[iCurDid][0].sSliceData.iMbCount[I_SLICE][7] += pCtx->sPerInfo.iMbCount[I_SLICE][7]; } memset (pCtx->sPerInfo.iMbCount[P_SLICE], 0, 18 * sizeof (int32_t)); memset (pCtx->sPerInfo.iMbCount[I_SLICE], 0, 18 * sizeof (int32_t)); #endif//MB_TYPES_CHECK { //no pCtx->pSvcParam->bMgsT0OnlyStrategy ++ pCtx->sStatData[iCurDid][0].sSliceData.iSliceCount[pCtx->eSliceType]; // for multiple slices coding pCtx->sStatData[iCurDid][0].sSliceData.iSliceSize[pCtx->eSliceType] += (iLayerSize << 3); // bits } #endif//STAT_OUTPUT #if defined(MT_ENABLED) && defined(PACKING_ONE_SLICE_PER_LAYER) if (pSvcParam->iMultipleThreadIdc <= 1 || SM_SINGLE_SLICE == param_d->sMso.uiSliceMode) // sigle thread actually used #else if (1) #endif//MT_ENABLED && PACKING_ONE_SLICE_PER_LAYER { ++ iLayerNum; ++ pLayerBsInfo; } pLayerBsInfo->pBsBuf = pCtx->pFrameBs + pCtx->iPosBsBuffer; if (pSvcParam->iPaddingFlag && pCtx->pWelsSvcRc[pCtx->uiDependencyId].iPaddingSize > 0) { const int32_t kiPaddingNalSize = WritePadding (pCtx, pCtx->pWelsSvcRc[pCtx->uiDependencyId].iPaddingSize); #if GOM_TRACE_FLAG WelsLog (pCtx, WELS_LOG_INFO, "[RC] encoding_qp%d Padding: %d\n", pCtx->uiDependencyId, pCtx->pWelsSvcRc[pCtx->uiDependencyId].iPaddingSize); #endif if (kiPaddingNalSize <= 0) return -1; pCtx->pWelsSvcRc[pCtx->uiDependencyId].iPaddingBitrateStat += pCtx->pWelsSvcRc[pCtx->uiDependencyId].iPaddingSize; pCtx->pWelsSvcRc[pCtx->uiDependencyId].iPaddingSize = 0; pLayerBsInfo->uiPriorityId = 0; pLayerBsInfo->uiSpatialId = 0; pLayerBsInfo->uiTemporalId = 0; pLayerBsInfo->uiQualityId = 0; pLayerBsInfo->uiLayerType = NON_VIDEO_CODING_LAYER; pLayerBsInfo->iNalCount = 1; pLayerBsInfo->iNalLengthInByte[0] = kiPaddingNalSize; ++ pLayerBsInfo; pLayerBsInfo->pBsBuf = pCtx->pFrameBs + pCtx->iPosBsBuffer; ++ iLayerNum; } #if defined(MT_ENABLED) && defined(DYNAMIC_SLICE_ASSIGN) && defined(TRY_SLICING_BALANCE) if (param_d->sMso.uiSliceMode == SM_FIXEDSLCNUM_SLICE && pSvcParam->iMultipleThreadIdc > 1 && pSvcParam->iMultipleThreadIdc >= param_d->sMso.sSliceArgument.iSliceNum) { CalcSliceComplexRatio (pCtx->pSliceThreading->pSliceComplexRatio[iCurDid], pCtx->pCurDqLayer->pSliceEncCtx, pCtx->pSliceThreading->pSliceConsumeTime[iCurDid]); #if defined(MT_DEBUG) TrackSliceComplexities (pCtx, iCurDid); #endif//#if defined(MT_DEBUG) } #endif//MT_ENABLED && DYNAMIC_SLICE_ASSIGN && TRY_SLICING_BALANCE ++ iSpatialIdx; if (iCurDid + 1 < pSvcParam->iNumDependencyLayer) { WelsSwapDqLayers (pCtx); } if (pSvcParam->bEnableLongTermReference && (pCtx->pLtr[pCtx->uiDependencyId].bLTRMarkingFlag && (pCtx->pLtr[pCtx->uiDependencyId].iLTRMarkMode == LTR_DELAY_MARK))) { pCtx->bLongTermRefFlag[d_idx][0] = true; } if (iCurTid < pCtx->uiSpatialLayersInTemporal[d_idx] - 1 || pSvcParam->iDecompStages == 0) { if ((iCurTid >= MAX_TEMPORAL_LEVEL) || (pCtx->uiSpatialLayersInTemporal[d_idx] - 1 >= MAX_TEMPORAL_LEVEL)) { ForceCodingIDR (pCtx); // some logic error return -1; } if (pSvcParam->bEnableLongTermReference && pCtx->bLongTermRefFlag[d_idx][iCurTid]) { SPicture* tmp = pCtx->pSpatialPic[d_idx][pCtx->uiSpatialLayersInTemporal[d_idx] + pCtx->pVaa->uiMarkLongTermPicIdx]; pCtx->pSpatialPic[d_idx][pCtx->uiSpatialLayersInTemporal[d_idx] + pCtx->pVaa->uiMarkLongTermPicIdx] = pCtx->pSpatialPic[d_idx][iCurTid]; pCtx->pSpatialPic[d_idx][iCurTid] = pCtx->pSpatialPic[d_idx][pCtx->uiSpatialLayersInTemporal[d_idx] - 1]; pCtx->pSpatialPic[d_idx][pCtx->uiSpatialLayersInTemporal[d_idx] - 1] = tmp; pCtx->bLongTermRefFlag[d_idx][iCurTid] = false; } else { WelsExchangeSpatialPictures (&pCtx->pSpatialPic[d_idx][pCtx->uiSpatialLayersInTemporal[d_idx] - 1], &pCtx->pSpatialPic[d_idx][iCurTid]); } } if (pSvcParam->bEnableLongTermReference && ((pCtx->pLtr[pCtx->uiDependencyId].bLTRMarkingFlag && (pCtx->pLtr[pCtx->uiDependencyId].iLTRMarkMode == LTR_DIRECT_MARK)) || eFrameType == WELS_FRAME_TYPE_IDR)) { pCtx->bLongTermRefFlag[d_idx][iCurTid] = true; } } #if defined(MT_ENABLED) && defined(MT_DEBUG) TrackSliceConsumeTime (pCtx, did_list, iSpatialNum); #endif//MT_ENABLED && MT_DEBUG #if defined(MT_ENABLED) && defined(DYNAMIC_SLICE_ASSIGN) if (pSvcParam->iMultipleThreadIdc > 1 && did_list[0] == BASE_DEPENDENCY_ID && pSvcParam->sDependencyLayers[0].sMso.uiSliceMode == SM_FIXEDSLCNUM_SLICE && pSvcParam->iMultipleThreadIdc >= pSvcParam->sDependencyLayers[0].sMso.sSliceArgument.iSliceNum && pSvcParam->sDependencyLayers[did_list[iSpatialNum - 1]].sMso.uiSliceMode == SM_FIXEDSLCNUM_SLICE && pSvcParam->iMultipleThreadIdc >= pSvcParam->sDependencyLayers[did_list[iSpatialNum - 1]].sMso.sSliceArgument.iSliceNum) { AdjustBaseLayer (pCtx); } #endif//DYNAMIC_SLICE_ASSIGN #ifdef ENABLE_FRAME_DUMP DumpRecFrame (fsnr, &pSvcParam->sDependencyLayers[pSvcParam->iNumDependencyLayer - 1].sRecFileName[0]); // pDecPic: final reconstruction output #endif//ENABLE_FRAME_DUMP ++ pCtx->iCodingIndex; pCtx->eLastNalPriority = eNalRefIdc; pFbi->iLayerNum = iLayerNum; #if defined(X86_ASM) WelsEmms(); #endif //X86_ASM return eFrameType; } /*! * \brief Wels SVC encoder parameters adjustment * SVC adjustment results in new requirement in memory blocks adjustment */ int32_t WelsEncoderParamAdjust (sWelsEncCtx** ppCtx, SWelsSvcCodingParam* pNewParam) { SWelsSvcCodingParam* pOldParam = NULL; int32_t iReturn = 0; int8_t iIndexD = 0; bool_t bNeedReset = false; if (NULL == ppCtx || NULL == *ppCtx || NULL == pNewParam) return 1; /* Check validation in new parameters */ iReturn = ParamValidationExt (pNewParam); if (iReturn != 0) return iReturn; pOldParam = (*ppCtx)->pSvcParam; /* Decide whether need reset for IDR frame based on adjusting prarameters changed */ /* Temporal levels, spatial settings and/ or quality settings changed need update parameter sets related. */ bNeedReset = (pOldParam == NULL) || (pOldParam->iNumTemporalLayer != pNewParam->iNumTemporalLayer) || (pOldParam->uiGopSize != pNewParam->uiGopSize) || (pOldParam->iNumDependencyLayer != pNewParam->iNumDependencyLayer) || (pOldParam->iDecompStages != pNewParam->iDecompStages) || (pOldParam->iActualPicWidth != pNewParam->iActualPicWidth || pOldParam->iActualPicHeight != pNewParam->iActualPicHeight) || (pOldParam->SUsedPicRect.iWidth != pNewParam->SUsedPicRect.iWidth || pOldParam->SUsedPicRect.iHeight != pNewParam->SUsedPicRect.iHeight) || (pOldParam->bEnableLongTermReference != pNewParam->bEnableLongTermReference); if (!bNeedReset) { // Check its picture resolutions/quality settings respectively in each dependency layer iIndexD = 0; assert (pOldParam->iNumDependencyLayer == pNewParam->iNumDependencyLayer); do { const SDLayerParam* kpOldDlp = &pOldParam->sDependencyLayers[iIndexD]; const SDLayerParam* kpNewDlp = &pNewParam->sDependencyLayers[iIndexD]; float fT1 = .0f; float fT2 = .0f; // check frame size settings if (kpOldDlp->iFrameWidth != kpNewDlp->iFrameWidth || kpOldDlp->iFrameHeight != kpNewDlp->iFrameHeight || kpOldDlp->iActualWidth != kpNewDlp->iActualWidth || kpOldDlp->iActualHeight != kpNewDlp->iActualHeight) { bNeedReset = true; break; } if (kpOldDlp->sMso.uiSliceMode != kpNewDlp->sMso.uiSliceMode || kpOldDlp->sMso.sSliceArgument.iSliceNum != kpNewDlp->sMso.sSliceArgument.iSliceNum) { bNeedReset = true; break; } // check frame rate // we can not check whether corresponding fFrameRate is equal or not, // only need to check d_max/d_min and max_fr/d_max whether it is equal or not if (kpNewDlp->fInputFrameRate > EPSN && kpOldDlp->fInputFrameRate > EPSN) fT1 = kpNewDlp->fOutputFrameRate / kpNewDlp->fInputFrameRate - kpOldDlp->fOutputFrameRate / kpOldDlp->fInputFrameRate; if (kpNewDlp->fOutputFrameRate > EPSN && kpOldDlp->fOutputFrameRate > EPSN) fT2 = pNewParam->fMaxFrameRate / kpNewDlp->fOutputFrameRate - pOldParam->fMaxFrameRate / kpOldDlp->fOutputFrameRate; if (fT1 > EPSN || fT1 < -EPSN || fT2 > EPSN || fT2 < -EPSN) { bNeedReset = true; break; } if (kpOldDlp->iHighestTemporalId != kpNewDlp->iHighestTemporalId) { bNeedReset = true; break; } ++ iIndexD; } while (iIndexD < pOldParam->iNumDependencyLayer); } if (bNeedReset) { SParaSetOffsetVariable sTmpPsoVariable[PARA_SET_TYPE]; uint16_t uiTmpIdrPicId;//this is for LTR! memcpy (sTmpPsoVariable, (*ppCtx)->sPSOVector.sParaSetOffsetVariable, (PARA_SET_TYPE)*sizeof (SParaSetOffsetVariable)); // confirmed_safe_unsafe_usage uiTmpIdrPicId = (*ppCtx)->sPSOVector.uiIdrPicId; WelsUninitEncoderExt (ppCtx); /* Update new parameters */ if (WelsInitEncoderExt (ppCtx, pNewParam)) return 1; // reset the scaled spatial picture size (*ppCtx)->pVpp->WelsPreprocessReset (*ppCtx); //if WelsInitEncoderExt succeed //for FLEXIBLE_PARASET_ID memcpy ((*ppCtx)->sPSOVector.sParaSetOffsetVariable, sTmpPsoVariable, (PARA_SET_TYPE)*sizeof (SParaSetOffsetVariable)); // confirmed_safe_unsafe_usage (*ppCtx)->sPSOVector.uiIdrPicId = uiTmpIdrPicId; } else { /* maybe adjustment introduced in bitrate or little settings adjustment and so on.. */ pNewParam->iNumRefFrame = WELS_CLIP3 (pNewParam->iNumRefFrame, MIN_REF_PIC_COUNT, MAX_REFERENCE_PICTURE_COUNT_NUM); pNewParam->iLoopFilterDisableIdc = WELS_CLIP3 (pNewParam->iLoopFilterDisableIdc, 0, 6); pNewParam->iLoopFilterAlphaC0Offset = WELS_CLIP3 (pNewParam->iLoopFilterAlphaC0Offset, -6, 6); pNewParam->iLoopFilterBetaOffset = WELS_CLIP3 (pNewParam->iLoopFilterBetaOffset, -6, 6); pNewParam->iInterLayerLoopFilterDisableIdc = WELS_CLIP3 (pNewParam->iInterLayerLoopFilterDisableIdc, 0, 6); pNewParam->iInterLayerLoopFilterAlphaC0Offset = WELS_CLIP3 (pNewParam->iInterLayerLoopFilterAlphaC0Offset, -6, 6); pNewParam->iInterLayerLoopFilterBetaOffset = WELS_CLIP3 (pNewParam->iInterLayerLoopFilterBetaOffset, -6, 6); pNewParam->fMaxFrameRate = WELS_CLIP3 (pNewParam->fMaxFrameRate, MIN_FRAME_RATE, MAX_FRAME_RATE); // we can not use direct struct based memcpy due some fields need keep unchanged as before pOldParam->fMaxFrameRate = pNewParam->fMaxFrameRate; // maximal frame rate [Hz / fps] pOldParam->iInputCsp = pNewParam->iInputCsp; // color space of input sequence pOldParam->uiIntraPeriod = pNewParam->uiIntraPeriod; // intra period (multiple of GOP size as desired) pOldParam->bEnableSpsPpsIdAddition = pNewParam->bEnableSpsPpsIdAddition; pOldParam->bPrefixNalAddingCtrl = pNewParam->bPrefixNalAddingCtrl; pOldParam->iNumRefFrame = pNewParam->iNumRefFrame; // number of reference frame used /* denoise control */ pOldParam->bEnableDenoise = pNewParam->bEnableDenoise; /* background detection control */ pOldParam->bEnableBackgroundDetection = pNewParam->bEnableBackgroundDetection; /* adaptive quantization control */ pOldParam->bEnableAdaptiveQuant = pNewParam->bEnableAdaptiveQuant; /* int32_t term reference control */ pOldParam->bEnableLongTermReference = pNewParam->bEnableLongTermReference; pOldParam->uiLtrMarkPeriod = pNewParam->uiLtrMarkPeriod; // keep below values unchanged as before pOldParam->bEnableSSEI = pNewParam->bEnableSSEI; pOldParam->bEnableFrameCroppingFlag = pNewParam->bEnableFrameCroppingFlag; // enable frame cropping flag /* Motion search */ /* Deblocking loop filter */ pOldParam->iLoopFilterDisableIdc = pNewParam->iLoopFilterDisableIdc; // 0: on, 1: off, 2: on except for slice boundaries pOldParam->iLoopFilterAlphaC0Offset = pNewParam->iLoopFilterAlphaC0Offset;// AlphaOffset: valid range [-6, 6], default 0 pOldParam->iLoopFilterBetaOffset = pNewParam->iLoopFilterBetaOffset; // BetaOffset: valid range [-6, 6], default 0 pOldParam->iInterLayerLoopFilterDisableIdc = pNewParam->iInterLayerLoopFilterDisableIdc; // Employed based upon inter-layer, same comment as above pOldParam->iInterLayerLoopFilterAlphaC0Offset = pNewParam->iInterLayerLoopFilterAlphaC0Offset; // InterLayerLoopFilterAlphaC0Offset pOldParam->iInterLayerLoopFilterBetaOffset = pNewParam->iInterLayerLoopFilterBetaOffset; // InterLayerLoopFilterBetaOffset /* Rate Control */ pOldParam->bEnableRc = pNewParam->bEnableRc; pOldParam->iRCMode = pNewParam->iRCMode; pOldParam->iTargetBitrate = pNewParam->iTargetBitrate; // overall target bitrate introduced in RC module pOldParam->iPaddingFlag = pNewParam->iPaddingFlag; /* Layer definition */ pOldParam->bPrefixNalAddingCtrl = pNewParam->bPrefixNalAddingCtrl; // d iIndexD = 0; do { SDLayerParam* pOldDlp = &pOldParam->sDependencyLayers[iIndexD]; SDLayerParam* pNewDlp = &pNewParam->sDependencyLayers[iIndexD]; pOldDlp->fInputFrameRate = pNewDlp->fInputFrameRate; // input frame rate pOldDlp->fOutputFrameRate = pNewDlp->fOutputFrameRate; // output frame rate pOldDlp->iSpatialBitrate = pNewDlp->iSpatialBitrate; pOldDlp->uiProfileIdc = pNewDlp->uiProfileIdc; // value of profile IDC (0 for auto-detection) /* Derived variants below */ pOldDlp->iTemporalResolution = pNewDlp->iTemporalResolution; pOldDlp->iDecompositionStages = pNewDlp->iDecompositionStages; memcpy (pOldDlp->uiCodingIdx2TemporalId, pNewDlp->uiCodingIdx2TemporalId, sizeof (pOldDlp->uiCodingIdx2TemporalId)); // confirmed_safe_unsafe_usage ++ iIndexD; } while (iIndexD < pOldParam->iNumDependencyLayer); } /* Any else initialization/reset for rate control here? */ return 0; } int32_t WelsCodeOnePicPartition (sWelsEncCtx* pCtx, SLayerBSInfo* pLayerBsInfo, int32_t* pNalIdxInLayer, int32_t* pLayerSize, int32_t iFirstMbInPartition, // first mb inclusive in partition int32_t iEndMbInPartition, // end mb exclusive in partition int32_t iStartSliceIdx ) { SDqLayer* pCurLayer = pCtx->pCurDqLayer; SSliceCtx* pSliceCtx = pCurLayer->pSliceEncCtx; int32_t iNalLen[MAX_NAL_UNITS_IN_LAYER] = {0}; int32_t iNalIdxInLayer = *pNalIdxInLayer; int32_t iSliceIdx = iStartSliceIdx; const int32_t kiSliceStep = pCtx->iActiveThreadsNum; const int32_t kiPartitionId = iStartSliceIdx % kiSliceStep; int32_t iPartitionBsSize = 0; int32_t iAnyMbLeftInPartition = iEndMbInPartition - iFirstMbInPartition; const EWelsNalUnitType keNalType = pCtx->eNalType; const EWelsNalRefIdc keNalRefIdc = pCtx->eNalPriority; const bool_t kbNeedPrefix = pCtx->bNeedPrefixNalFlag; //init { pSliceCtx->pFirstMbInSlice[iSliceIdx] = iFirstMbInPartition; pCurLayer->pNumSliceCodedOfPartition[kiPartitionId] = 1; // one slice per partition intialized, dynamic slicing inside pCurLayer->pLastMbIdxOfPartition[kiPartitionId] = iEndMbInPartition - 1; } pCurLayer->pLastCodedMbIdxOfPartition[kiPartitionId] = 0; while (iAnyMbLeftInPartition > 0) { int32_t iSliceSize = 0; if (iSliceIdx >= pSliceCtx->iMaxSliceNumConstraint) { // insufficient memory in pSliceInLayer[] // TODO: need exception handler for not large enough of MAX_SLICES_NUM related memory usage // No idea about its solution due MAX_SLICES_NUM is fixed lenght in relevent pData structure return 1; } if (kbNeedPrefix) { iPartitionBsSize += AddPrefixNal (pCtx, pLayerBsInfo, &iNalLen[0], &iNalIdxInLayer, keNalType, keNalRefIdc); } WelsLoadNal (pCtx->pOut, keNalType, keNalRefIdc); WelsCodeOneSlice (pCtx, iSliceIdx, keNalType); WelsUnloadNal (pCtx->pOut); iSliceSize = WelsEncodeNalExt (&pCtx->pOut->sNalList[pCtx->pOut->iNalIndex - 1], &pCtx->pCurDqLayer->sLayerInfo.sNalHeaderExt, pCtx->pFrameBs + pCtx->iPosBsBuffer, &iNalLen[iNalIdxInLayer]); pCtx->iPosBsBuffer += iSliceSize; iPartitionBsSize += iSliceSize; pLayerBsInfo->iNalLengthInByte[iNalIdxInLayer] = iSliceSize; #if defined(SLICE_INFO_OUTPUT) fprintf (stderr, "@slice=%-6d sliceType:%c idc:%d size:%-6d\n", iSliceIdx, (pCtx->eSliceType == P_SLICE ? 'P' : 'I'), eNalRefIdc, iSliceSize); #endif//SLICE_INFO_OUTPUT ++ iNalIdxInLayer; iSliceIdx += kiSliceStep; //if uiSliceIdx is not continuous iAnyMbLeftInPartition = iEndMbInPartition - (1 + pCurLayer->pLastCodedMbIdxOfPartition[kiPartitionId]); } *pLayerSize = iPartitionBsSize; *pNalIdxInLayer = iNalIdxInLayer; // slice based packing??? pLayerBsInfo->uiLayerType = VIDEO_CODING_LAYER; pLayerBsInfo->uiSpatialId = pCtx->uiDependencyId; pLayerBsInfo->uiTemporalId = pCtx->uiTemporalId; pLayerBsInfo->uiQualityId = 0; pLayerBsInfo->uiPriorityId = 0; pLayerBsInfo->iNalCount = iNalIdxInLayer; return 0; } } // namespace WelsSVCEnc