shithub: openh264

Download patch

ref: 914650abc6168adfc7e73c834dcd41bbcf546bdc
parent: e413ed20a2c3346e7097c45f7cb5f2bb3b9c0849
parent: 66c39a4fd45a0e25293c91f096f5bed202ea2688
author: Ethan Hugg <ethanhugg@gmail.com>
date: Wed Jan 29 10:17:44 EST 2014

Merge pull request #255 from mstorsjo/unify-stack-align-macros

Remove the array_stack_align.h header from the encoder lib

--- a/codec/build/win32/enc/WelsEncCore.vcproj
+++ b/codec/build/win32/enc/WelsEncCore.vcproj
@@ -1433,10 +1433,6 @@
 			Filter="h;hpp;hxx;hm;inl"
 			>
 			<File
-				RelativePath="..\..\..\encoder\core\inc\array_stack_align.h"
-				>
-			</File>
-			<File
 				RelativePath="..\..\..\encoder\core\inc\as264_common.h"
 				>
 			</File>
--- a/codec/common/deblocking_common.cpp
+++ b/codec/common/deblocking_common.cpp
@@ -184,7 +184,7 @@
 #ifdef X86_ASM
 extern "C" {
   void DeblockLumaLt4H_ssse3 (uint8_t* pPixY, int32_t iStride, int32_t iAlpha, int32_t iBeta, int8_t* pTc) {
-    FORCE_STACK_ALIGN_1D (uint8_t,  uiBuf,   16 * 8, 16);
+    ENFORCE_STACK_ALIGN_1D (uint8_t,  uiBuf,   16 * 8, 16);
 
     DeblockLumaTransposeH2V_sse2 (pPixY - 4, iStride, &uiBuf[0]);
     DeblockLumaLt4V_ssse3 (&uiBuf[4 * 16], 16, iAlpha, iBeta, pTc);
@@ -192,7 +192,7 @@
   }
 
   void DeblockLumaEq4H_ssse3 (uint8_t* pPixY, int32_t iStride, int32_t iAlpha, int32_t iBeta) {
-    FORCE_STACK_ALIGN_1D (uint8_t,  uiBuf,   16 * 8, 16);
+    ENFORCE_STACK_ALIGN_1D (uint8_t,  uiBuf,   16 * 8, 16);
 
     DeblockLumaTransposeH2V_sse2 (pPixY - 4, iStride, &uiBuf[0]);
     DeblockLumaEq4V_ssse3 (&uiBuf[4 * 16], 16, iAlpha, iBeta);
--- a/codec/common/macros.h
+++ b/codec/common/macros.h
@@ -45,7 +45,7 @@
 #include "typedefs.h"
 
 /*
-* FORCE_STACK_ALIGN_1D: force 1 dimension local data aligned in stack
+* ENFORCE_STACK_ALIGN_1D: force 1 dimension local data aligned in stack
 * _tp: type
 * _nm: var name
 * _sz: size
@@ -52,9 +52,9 @@
 * _al: align bytes
 * auxiliary var: _nm ## _tEmP
 */
-#define FORCE_STACK_ALIGN_1D(_tp, _nm, _sz, _al) \
+#define ENFORCE_STACK_ALIGN_1D(_tp, _nm, _sz, _al) \
 	_tp _nm ## _tEmP[(_sz)+(_al)-1]; \
-	_tp *_nm = _nm ## _tEmP + ((_al)-1) - (((uintptr_t)(_nm ## _tEmP + ((_al)-1)) & ((_al)-1))/sizeof(_tp))
+	_tp *_nm = _nm ## _tEmP + ((_al)-1) - (((uintptr_t)(_nm ## _tEmP + ((_al)-1)) & ((_al)-1))/sizeof(_tp));
 
 
 #define ENFORCE_STACK_ALIGN_2D(_tp, _nm, _cx, _cy, _al) \
--- a/codec/decoder/core/src/deblocking.cpp
+++ b/codec/decoder/core/src/deblocking.cpp
@@ -145,7 +145,7 @@
 
 void_t inline DeblockingBSInsideMBAvsbase (int8_t* pNnzTab, uint8_t nBS[2][4][4], int32_t iLShiftFactor) {
   uint32_t uiNnz32b0, uiNnz32b1, uiNnz32b2, uiNnz32b3;
-  FORCE_STACK_ALIGN_1D (uint8_t, uiBsx3, 4, 4);
+  ENFORCE_STACK_ALIGN_1D (uint8_t, uiBsx3, 4, 4);
 
   uiNnz32b0 = * (uint32_t*) (pNnzTab + 0);
   uiNnz32b1 = * (uint32_t*) (pNnzTab + 4);
@@ -181,7 +181,7 @@
     int32_t iMbXy) {
   uint32_t uiNnz32b0, uiNnz32b1, uiNnz32b2, uiNnz32b3;
   int8_t* iRefIndex = pCurDqLayer->pRefIndex[LIST_0][iMbXy];
-  FORCE_STACK_ALIGN_1D (uint8_t, uiBsx4, 4, 4);
+  ENFORCE_STACK_ALIGN_1D (uint8_t, uiBsx4, 4, 4);
 
   uiNnz32b0 = * (uint32_t*) (pNnzTab + 0);
   uiNnz32b1 = * (uint32_t*) (pNnzTab + 4);
@@ -269,7 +269,7 @@
   int32_t iIndexA;
   int32_t iAlpha;
   int32_t iBeta;
-  FORCE_STACK_ALIGN_1D (int8_t, tc, 4, 16);
+  ENFORCE_STACK_ALIGN_1D (int8_t, tc, 4, 16);
 
   GET_ALPHA_BETA_FROM_QP (pFilter->iLumaQP, pFilter->iSliceAlphaC0Offset, pFilter->iSliceBetaOffset, iIndexA, iAlpha,
                           iBeta);
@@ -286,7 +286,7 @@
   int32_t  iIndexA;
   int32_t  iAlpha;
   int32_t  iBeta;
-  FORCE_STACK_ALIGN_1D (int8_t, tc, 4, 16);
+  ENFORCE_STACK_ALIGN_1D (int8_t, tc, 4, 16);
 
   GET_ALPHA_BETA_FROM_QP (pFilter->iLumaQP, pFilter->iSliceAlphaC0Offset, pFilter->iSliceBetaOffset, iIndexA, iAlpha,
                           iBeta);
@@ -331,7 +331,7 @@
   int32_t iIndexA;
   int32_t iAlpha;
   int32_t iBeta;
-  FORCE_STACK_ALIGN_1D (int8_t, tc, 4, 16);
+  ENFORCE_STACK_ALIGN_1D (int8_t, tc, 4, 16);
 
   GET_ALPHA_BETA_FROM_QP (pFilter->iChromaQP, pFilter->iSliceAlphaC0Offset, pFilter->iSliceBetaOffset, iIndexA, iAlpha,
                           iBeta);
@@ -347,7 +347,7 @@
   int32_t iIndexA;
   int32_t iAlpha;
   int32_t iBeta;
-  FORCE_STACK_ALIGN_1D (int8_t, tc, 4, 16);
+  ENFORCE_STACK_ALIGN_1D (int8_t, tc, 4, 16);
 
   GET_ALPHA_BETA_FROM_QP (pFilter->iChromaQP, pFilter->iSliceAlphaC0Offset, pFilter->iSliceBetaOffset, iIndexA, iAlpha,
                           iBeta);
@@ -483,8 +483,8 @@
   int32_t  iCurQp;
   int32_t  iIndexA, iAlpha, iBeta;
 
-  FORCE_STACK_ALIGN_1D (int8_t,  iTc,   4, 16);
-  FORCE_STACK_ALIGN_1D (uint8_t, uiBSx4, 4, 4);
+  ENFORCE_STACK_ALIGN_1D (int8_t,  iTc,   4, 16);
+  ENFORCE_STACK_ALIGN_1D (uint8_t, uiBSx4, 4, 4);
 
   pDestY  = pFilter->pCsData[0] + ((iMbY * iLineSize + iMbX) << 4);
   iCurQp  = pCurDqLayer->pLumaQp[iMbXyIndex];
@@ -531,8 +531,8 @@
   int32_t  iCurQp;
   int32_t  iIndexA, iAlpha, iBeta;
 
-  FORCE_STACK_ALIGN_1D (int8_t,  iTc,   4, 16);
-  FORCE_STACK_ALIGN_1D (uint8_t, uiBSx4, 4, 4);
+  ENFORCE_STACK_ALIGN_1D (int8_t,  iTc,   4, 16);
+  ENFORCE_STACK_ALIGN_1D (uint8_t, uiBSx4, 4, 4);
 
   pDestCb = pFilter->pCsData[1] + ((iMbY * iLineSize + iMbX) << 3);
   pDestCr = pFilter->pCsData[2] + ((iMbY * iLineSize + iMbX) << 3);
--- a/codec/decoder/core/src/decode_slice.cpp
+++ b/codec/decoder/core/src/decode_slice.cpp
@@ -492,7 +492,7 @@
   int32_t iNMbMode, i;
   uint32_t uiMbType = 0, uiCbp = 0, uiCbpL = 0, uiCbpC = 0;
 
-  FORCE_STACK_ALIGN_1D (uint8_t, pNonZeroCount, 48, 16);
+  ENFORCE_STACK_ALIGN_1D (uint8_t, pNonZeroCount, 48, 16);
 
   pCurLayer->pInterPredictionDoneFlag[iMbXy] = 0;
   pCurLayer->pResidualPredFlag[iMbXy] = pSlice->sSliceHeaderExt.bDefaultResidualPredFlag;
@@ -553,7 +553,7 @@
     memset (pCurLayer->pNzc[iMbXy], 16, sizeof (pCurLayer->pNzc[iMbXy]));   //JVT-x201wcm1.doc, page229, 2009.10.23
     return 0;
   } else if (0 == uiMbType) { //reference to JM
-    FORCE_STACK_ALIGN_1D (int8_t, pIntraPredMode, 48, 16);
+    ENFORCE_STACK_ALIGN_1D (int8_t, pIntraPredMode, 48, 16);
     pCurLayer->pMbType[iMbXy] = MB_TYPE_INTRA4x4;
     pCtx->pFillInfoCacheIntra4x4Func (&sNeighAvail, pNonZeroCount, pIntraPredMode, pCurLayer);
     if (pCtx->pParseIntra4x4ModeFunc (&sNeighAvail, pIntraPredMode, pBs, pCurLayer)) {
@@ -770,7 +770,7 @@
   int32_t iNMbMode, i;
   uint32_t uiMbType = 0, uiCbp = 0, uiCbpL = 0, uiCbpC = 0;
 
-  FORCE_STACK_ALIGN_1D (uint8_t, pNonZeroCount, 48, 16);
+  ENFORCE_STACK_ALIGN_1D (uint8_t, pNonZeroCount, 48, 16);
   pCurLayer->pInterPredictionDoneFlag[iMbXy] = 0;//2009.10.23
 
   uiMbType = BsGetUe (pBs);
@@ -859,7 +859,7 @@
       return 0;
     } else {
       if (0 == uiMbType) {
-        FORCE_STACK_ALIGN_1D (int8_t, pIntraPredMode, 48, 16);
+        ENFORCE_STACK_ALIGN_1D (int8_t, pIntraPredMode, 48, 16);
         pCurLayer->pMbType[iMbXy] = MB_TYPE_INTRA4x4;
         pCtx->pFillInfoCacheIntra4x4Func (&sNeighAvail, pNonZeroCount, pIntraPredMode, pCurLayer);
         if (pCtx->pParseIntra4x4ModeFunc (&sNeighAvail, pIntraPredMode, pBs, pCurLayer)) {
--- a/codec/decoder/core/src/mc.cpp
+++ b/codec/decoder/core/src/mc.cpp
@@ -420,7 +420,7 @@
 
 static inline void_t McHorVer01_sse2 (uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
                                       int32_t iWidth, int32_t iHeight) {
-  FORCE_STACK_ALIGN_1D (uint8_t, pTmp, 256, 16);
+  ENFORCE_STACK_ALIGN_1D (uint8_t, pTmp, 256, 16);
   if (iWidth == 16) {
     McHorVer02WidthEq16_sse2 (pSrc, iSrcStride, pTmp, 16, iHeight);
     PixelAvgWidthEq16_sse2 (pDst, iDstStride, pSrc, iSrcStride, pTmp, 16, iHeight);
@@ -434,7 +434,7 @@
 }
 static inline void_t McHorVer03_sse2 (uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
                                       int32_t iWidth, int32_t iHeight) {
-  FORCE_STACK_ALIGN_1D (uint8_t, pTmp, 256, 16);
+  ENFORCE_STACK_ALIGN_1D (uint8_t, pTmp, 256, 16);
   if (iWidth == 16) {
     McHorVer02WidthEq16_sse2 (pSrc, iSrcStride, pTmp, 16, iHeight);
     PixelAvgWidthEq16_sse2 (pDst, iDstStride, pSrc + iSrcStride, iSrcStride, pTmp, 16, iHeight);
@@ -448,7 +448,7 @@
 }
 static inline void_t McHorVer10_sse2 (uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
                                       int32_t iWidth, int32_t iHeight) {
-  FORCE_STACK_ALIGN_1D (uint8_t, pTmp, 256, 16);
+  ENFORCE_STACK_ALIGN_1D (uint8_t, pTmp, 256, 16);
   if (iWidth == 16) {
     McHorVer20WidthEq16_sse2 (pSrc, iSrcStride, pTmp, 16, iHeight);
     PixelAvgWidthEq16_sse2 (pDst, iDstStride, pSrc, iSrcStride, pTmp, 16, iHeight);
@@ -462,8 +462,8 @@
 }
 static inline void_t McHorVer11_sse2 (uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
                                       int32_t iWidth, int32_t iHeight) {
-  FORCE_STACK_ALIGN_1D (uint8_t, pHorTmp, 256, 16);
-  FORCE_STACK_ALIGN_1D (uint8_t, pVerTmp, 256, 16);
+  ENFORCE_STACK_ALIGN_1D (uint8_t, pHorTmp, 256, 16);
+  ENFORCE_STACK_ALIGN_1D (uint8_t, pVerTmp, 256, 16);
   if (iWidth == 16) {
     McHorVer20WidthEq16_sse2 (pSrc, iSrcStride, pHorTmp, 16, iHeight);
     McHorVer02WidthEq16_sse2 (pSrc, iSrcStride, pVerTmp, 16, iHeight);
@@ -480,8 +480,8 @@
 }
 static inline void_t McHorVer12_sse2 (uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
                                       int32_t iWidth, int32_t iHeight) {
-  FORCE_STACK_ALIGN_1D (uint8_t, pVerTmp, 256, 16);
-  FORCE_STACK_ALIGN_1D (uint8_t, pCtrTmp, 256, 16);
+  ENFORCE_STACK_ALIGN_1D (uint8_t, pVerTmp, 256, 16);
+  ENFORCE_STACK_ALIGN_1D (uint8_t, pCtrTmp, 256, 16);
   if (iWidth == 16) {
     McHorVer02WidthEq16_sse2 (pSrc, iSrcStride, pVerTmp, 16, iHeight);
     McHorVer22WidthEq16_sse2 (pSrc, iSrcStride, pCtrTmp, 16, iHeight);
@@ -498,8 +498,8 @@
 }
 static inline void_t McHorVer13_sse2 (uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
                                       int32_t iWidth, int32_t iHeight) {
-  FORCE_STACK_ALIGN_1D (uint8_t, pHorTmp, 256, 16);
-  FORCE_STACK_ALIGN_1D (uint8_t, pVerTmp, 256, 16);
+  ENFORCE_STACK_ALIGN_1D (uint8_t, pHorTmp, 256, 16);
+  ENFORCE_STACK_ALIGN_1D (uint8_t, pVerTmp, 256, 16);
   if (iWidth == 16) {
     McHorVer20WidthEq16_sse2 (pSrc + iSrcStride, iSrcStride, pHorTmp, 16, iHeight);
     McHorVer02WidthEq16_sse2 (pSrc,            iSrcStride, pVerTmp, 16, iHeight);
@@ -516,8 +516,8 @@
 }
 static inline void_t McHorVer21_sse2 (uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
                                       int32_t iWidth, int32_t iHeight) {
-  FORCE_STACK_ALIGN_1D (uint8_t, pHorTmp, 256, 16);
-  FORCE_STACK_ALIGN_1D (uint8_t, pCtrTmp, 256, 16);
+  ENFORCE_STACK_ALIGN_1D (uint8_t, pHorTmp, 256, 16);
+  ENFORCE_STACK_ALIGN_1D (uint8_t, pCtrTmp, 256, 16);
   if (iWidth == 16) {
     McHorVer20WidthEq16_sse2 (pSrc, iSrcStride, pHorTmp, 16, iHeight);
     McHorVer22WidthEq16_sse2 (pSrc, iSrcStride, pCtrTmp, 16, iHeight);
@@ -534,8 +534,8 @@
 }
 static inline void_t McHorVer23_sse2 (uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
                                       int32_t iWidth, int32_t iHeight) {
-  FORCE_STACK_ALIGN_1D (uint8_t, pHorTmp, 256, 16);
-  FORCE_STACK_ALIGN_1D (uint8_t, pCtrTmp, 256, 16);
+  ENFORCE_STACK_ALIGN_1D (uint8_t, pHorTmp, 256, 16);
+  ENFORCE_STACK_ALIGN_1D (uint8_t, pCtrTmp, 256, 16);
   if (iWidth == 16) {
     McHorVer20WidthEq16_sse2 (pSrc + iSrcStride, iSrcStride, pHorTmp, 16, iHeight);
     McHorVer22WidthEq16_sse2 (pSrc,            iSrcStride, pCtrTmp, 16, iHeight);
@@ -552,7 +552,7 @@
 }
 static inline void_t McHorVer30_sse2 (uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
                                       int32_t iWidth, int32_t iHeight) {
-  FORCE_STACK_ALIGN_1D (uint8_t, pHorTmp, 256, 16);
+  ENFORCE_STACK_ALIGN_1D (uint8_t, pHorTmp, 256, 16);
   if (iWidth == 16) {
     McHorVer20WidthEq16_sse2 (pSrc, iSrcStride, pHorTmp, 16, iHeight);
     PixelAvgWidthEq16_sse2 (pDst, iDstStride, pSrc + 1, iSrcStride, pHorTmp, 16, iHeight);
@@ -566,8 +566,8 @@
 }
 static inline void_t McHorVer31_sse2 (uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
                                       int32_t iWidth, int32_t iHeight) {
-  FORCE_STACK_ALIGN_1D (uint8_t, pHorTmp, 256, 16);
-  FORCE_STACK_ALIGN_1D (uint8_t, pVerTmp, 256, 16);
+  ENFORCE_STACK_ALIGN_1D (uint8_t, pHorTmp, 256, 16);
+  ENFORCE_STACK_ALIGN_1D (uint8_t, pVerTmp, 256, 16);
   if (iWidth == 16) {
     McHorVer20WidthEq16_sse2 (pSrc,   iSrcStride, pHorTmp, 16, iHeight);
     McHorVer02WidthEq16_sse2 (pSrc + 1, iSrcStride, pVerTmp, 16, iHeight);
@@ -584,8 +584,8 @@
 }
 static inline void_t McHorVer32_sse2 (uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
                                       int32_t iWidth, int32_t iHeight) {
-  FORCE_STACK_ALIGN_1D (uint8_t, pVerTmp, 256, 16);
-  FORCE_STACK_ALIGN_1D (uint8_t, pCtrTmp, 256, 16);
+  ENFORCE_STACK_ALIGN_1D (uint8_t, pVerTmp, 256, 16);
+  ENFORCE_STACK_ALIGN_1D (uint8_t, pCtrTmp, 256, 16);
   if (iWidth == 16) {
     McHorVer02WidthEq16_sse2 (pSrc + 1, iSrcStride, pVerTmp, 16, iHeight);
     McHorVer22WidthEq16_sse2 (pSrc,   iSrcStride, pCtrTmp, 16, iHeight);
@@ -602,8 +602,8 @@
 }
 static inline void_t McHorVer33_sse2 (uint8_t* pSrc, int32_t iSrcStride, uint8_t* pDst, int32_t iDstStride,
                                       int32_t iWidth, int32_t iHeight) {
-  FORCE_STACK_ALIGN_1D (uint8_t, pHorTmp, 256, 16);
-  FORCE_STACK_ALIGN_1D (uint8_t, pVerTmp, 256, 16);
+  ENFORCE_STACK_ALIGN_1D (uint8_t, pHorTmp, 256, 16);
+  ENFORCE_STACK_ALIGN_1D (uint8_t, pVerTmp, 256, 16);
   if (iWidth == 16) {
     McHorVer20WidthEq16_sse2 (pSrc + iSrcStride, iSrcStride, pHorTmp, 16, iHeight);
     McHorVer02WidthEq16_sse2 (pSrc + 1,          iSrcStride, pVerTmp, 16, iHeight);
@@ -666,4 +666,4 @@
 #endif //(X86_ASM)
 }
 
-} // namespace WelsDec
\ No newline at end of file
+} // namespace WelsDec
--- a/codec/decoder/core/src/rec_mb.cpp
+++ b/codec/decoder/core/src/rec_mb.cpp
@@ -237,7 +237,7 @@
   uint8_t* pDstV = pMCRefMem->pDstV;
   bool_t bExpand = false;
 
-  FORCE_STACK_ALIGN_1D (uint8_t, uiExpandBuf, (PADDING_LENGTH + 6) * (PADDING_LENGTH + 6), 16);
+  ENFORCE_STACK_ALIGN_1D (uint8_t, uiExpandBuf, (PADDING_LENGTH + 6) * (PADDING_LENGTH + 6), 16);
 
   if (iFullMVx & 0x07) {
     iExpandWidth -= 3;
--- a/codec/encoder/core/inc/array_stack_align.h
+++ /dev/null
@@ -1,121 +1,0 @@
-/*!
- * \copy
- *     Copyright (c)  2011-2013, Cisco Systems
- *     All rights reserved.
- *
- *     Redistribution and use in source and binary forms, with or without
- *     modification, are permitted provided that the following conditions
- *     are met:
- *
- *        * Redistributions of source code must retain the above copyright
- *          notice, this list of conditions and the following disclaimer.
- *
- *        * Redistributions in binary form must reproduce the above copyright
- *          notice, this list of conditions and the following disclaimer in
- *          the documentation and/or other materials provided with the
- *          distribution.
- *
- *     THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- *     "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- *     LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
- *     FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
- *     COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
- *     INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
- *     BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
- *     LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
- *     CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- *     LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
- *     ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- *     POSSIBILITY OF SUCH DAMAGE.
- *
- *
- * \file		array_stack_align.h
- *
- * \brief	promised alignment of array pData declaration on stack
- *			multidimensional array can be extended if applicable need
- *
- * \date		8/8/2011 Created
- *			8/12/2011 functionality implementation for multidimensional array
- *			8/26/2011 better solution with reducing extra memory used,
- *						stack size is adaptively reduced by _tp & _al
- *
- *************************************************************************************
- */
-#ifndef ARRAY_STACK_ALIGN_H__
-#define ARRAY_STACK_ALIGN_H__
-
-#include <assert.h>
-#include "typedefs.h"
-
-/*
- * ENFORCE_STACK_ALIGN_1D: force 1 dimension local pData aligned in stack
- * _tp: type
- * _nm: var name
- * _sz: size
- * _al: align bytes
- * auxiliary var: _nm ## _tEmP
- * NOTE: _al should be power-of-2 and >= sizeof(_tp), before considering to use such macro
- */
-
-//#define ENFORCE_STACK_ALIGN_1D(_tp, _nm, _sz, _al) \
-//_tp _nm ## _tEmP[(_sz)+(_al)-1]; \
-//_tp *_nm = _nm ## _tEmP + ((_al)-1); \
-//_nm -= (((int32_t)_nm & ((_al)-1))/sizeof(_tp));
-
-/* Another better solution with reducing extra memory used */
-#define ENFORCE_STACK_ALIGN_1D(_tp, _nm, _sz, _al) \
-assert( ((_al) && !((_al) & ((_al) - 1))) && ((_al) >= sizeof(_tp)) ); /*_al should be power-of-2 and >= sizeof(_tp)*/\
-_tp _nm ## _tEmP[(_sz)+(_al)/sizeof(_tp)-1]; \
-_tp *_nm = _nm ## _tEmP + ((_al)/sizeof(_tp)-1); \
-_nm -= (((uintptr_t)_nm & ((_al)-1))/sizeof(_tp));
-
-/*
- * ENFORCE_STACK_ALIGN_2D: force 2 dimension local pData aligned in stack
- * _tp: type
- * _nm: var name
- * _cx, _cy: size in x, y dimension
- * _al: align bytes
- * auxiliary var: _nm ## _tEmP, _nm ## _tEmP_al
- * NOTE: _al should be power-of-2 and >= sizeof(_tp), before considering to use such macro
- */
-
-//#define ENFORCE_STACK_ALIGN_2D(_tp, _nm, _cx, _cy, _al) \
-//_tp _nm ## _tEmP[(_cx)*(_cy)+(_al)-1]; \
-//_tp *_nm ## _tEmP_al = _nm ## _tEmP + ((_al)-1); \
-//_nm ## _tEmP_al -= (((int32_t)_nm ## _tEmP_al & ((_al)-1))/sizeof(_tp)); \
-//_tp (*_nm)[(_cy)] = (_tp (*)[(_cy)])_nm ## _tEmP_al;
-
-/* Another better solution with reducing extra memory used */
-#define ENFORCE_STACK_ALIGN_2D(_tp, _nm, _cx, _cy, _al) \
-assert( ((_al) && !((_al) & ((_al) - 1))) && ((_al) >= sizeof(_tp)) ); /*_al should be power-of-2 and >= sizeof(_tp)*/\
-_tp _nm ## _tEmP[(_cx)*(_cy)+(_al)/sizeof(_tp)-1]; \
-_tp *_nm ## _tEmP_al = _nm ## _tEmP + ((_al)/sizeof(_tp)-1); \
-_nm ## _tEmP_al -= (((uintptr_t)_nm ## _tEmP_al & ((_al)-1))/sizeof(_tp)); \
-_tp (*_nm)[(_cy)] = (_tp (*)[(_cy)])_nm ## _tEmP_al;
-
-/*
- * ENFORCE_STACK_ALIGN_3D: force 3 dimension local pData aligned in stack
- * _tp: type
- * _nm: var name
- * _cx, _cy, _cz: size in x, y, z dimension
- * _al: align bytes
- * auxiliary var: _nm ## _tEmP, _nm ## _tEmP_al
- * NOTE: _al should be power-of-2 and >= sizeof(_tp), before considering to use such macro
- */
-
-//#define ENFORCE_STACK_ALIGN_3D(_tp, _nm, _cx, _cy, _cz, _al) \
-//_tp _nm ## _tEmP[(_cx)*(_cy)*(_cz)+(_al)-1]; \
-//_tp *_nm ## _tEmP_al = _nm ## _tEmP + ((_al)-1); \
-//_nm ## _tEmP_al -= (((int32_t)_nm ## _tEmP_al & ((_al)-1))/sizeof(_tp)); \
-//_tp (*_nm)[(_cy)][(_cz)] = (_tp (*)[(_cy)][(_cz)])_nm ## _tEmP_al;
-
-/* Another better solution with reducing extra memory used */
-#define ENFORCE_STACK_ALIGN_3D(_tp, _nm, _cx, _cy, _cz, _al) \
-assert( ((_al) && !((_al) & ((_al) - 1))) && ((_al) >= sizeof(_tp)) ); /*_al should be power-of-2 and >= sizeof(_tp)*/\
-_tp _nm ## _tEmP[(_cx)*(_cy)*(_cz)+(_al)/sizeof(_tp)-1]; \
-_tp *_nm ## _tEmP_al = _nm ## _tEmP + ((_al)/sizeof(_tp)-1); \
-_nm ## _tEmP_al -= (((int32_t)_nm ## _tEmP_al & ((_al)-1))/sizeof(_tp)); \
-_tp (*_nm)[(_cy)][(_cz)] = (_tp (*)[(_cy)][(_cz)])_nm ## _tEmP_al;
-
-#endif//ARRAY_STACK_ALIGN_H__
-
--- a/codec/encoder/core/src/deblocking.cpp
+++ b/codec/encoder/core/src/deblocking.cpp
@@ -40,7 +40,6 @@
 
 #include "deblocking.h"
 #include "cpu_core.h"
-#include "array_stack_align.h"
 
 namespace WelsSVCEnc {
 
--- a/codec/encoder/core/src/encoder_ext.cpp
+++ b/codec/encoder/core/src/encoder_ext.cpp
@@ -51,7 +51,6 @@
 #include "ref_list_mgr_svc.h"
 #include "ls_defines.h"
 #include "crt_util_safe_x.h"	// Safe CRT routines like utils for cross platforms
-#include "array_stack_align.h"
 #if defined(MT_ENABLED)
 #include "slice_multi_threading.h"
 #endif//MT_ENABLED
--- a/codec/encoder/core/src/get_intra_predictor.cpp
+++ b/codec/encoder/core/src/get_intra_predictor.cpp
@@ -42,7 +42,6 @@
 #include "ls_defines.h"
 #include "cpu_core.h"
 #include "get_intra_predictor.h"
-#include "array_stack_align.h"
 
 namespace WelsSVCEnc {
 #define I4x4_COUNT 4
--- a/codec/encoder/core/src/mc.cpp
+++ b/codec/encoder/core/src/mc.cpp
@@ -40,7 +40,6 @@
 
 #include "mc.h"
 #include "cpu_core.h"
-#include "array_stack_align.h"
 
 namespace WelsSVCEnc {
 /*------------------weight for chroma fraction pixel interpolation------------------*/
--- a/codec/encoder/core/src/md.cpp
+++ b/codec/encoder/core/src/md.cpp
@@ -42,7 +42,6 @@
 #include "md.h"
 #include "cpu_core.h"
 #include "svc_enc_golomb.h"
-#include "array_stack_align.h"
 
 namespace WelsSVCEnc {
 #define INTRA_VARIANCE_SAD_THRESHOLD 150
--- a/codec/encoder/core/src/set_mb_syn_cavlc.cpp
+++ b/codec/encoder/core/src/set_mb_syn_cavlc.cpp
@@ -41,7 +41,6 @@
 #include "set_mb_syn_cavlc.h"
 #include "vlc_encoder.h"
 #include "cpu_core.h"
-#include "array_stack_align.h"
 
 namespace WelsSVCEnc {
 SCoeffFunc    sCoeffFunc;
--- a/codec/encoder/core/src/svc_encode_mb.cpp
+++ b/codec/encoder/core/src/svc_encode_mb.cpp
@@ -42,7 +42,6 @@
 #include "encode_mb_aux.h"
 #include "decode_mb_aux.h"
 #include "ls_defines.h"
-#include "array_stack_align.h"
 
 namespace WelsSVCEnc {
 void WelsDctMb (int16_t* pRes, uint8_t* pEncMb, int32_t iEncStride, uint8_t* pBestPred, PDctFunc pfDctFourT4) {
--- a/codec/encoder/core/src/svc_motion_estimate.cpp
+++ b/codec/encoder/core/src/svc_motion_estimate.cpp
@@ -41,7 +41,6 @@
 
 
 #include "svc_motion_estimate.h"
-#include "array_stack_align.h"
 
 namespace WelsSVCEnc {
 /*!