shithub: openh264

Download patch

ref: e0ef0cabdd605f5f5809358e1999b0457ba4f3ac
parent: 9d73d273ff81f12fb05c86d21231106ae307b8ce
parent: 06c534d9f2be39138d13beacf64d1c0d56739b8a
author: Licai Guo <licaguo@cisco.com>
date: Thu Mar 20 11:57:50 EDT 2014

Merge pull request #555 from huili2/remove_unused_func

rephrase blockzero function complexity and remove useless functions

--- a/codec/build/win32/dec/WelsDecCore.vcproj
+++ b/codec/build/win32/dec/WelsDecCore.vcproj
@@ -349,46 +349,6 @@
 				Filter="*.asm;*.inc"
 				>
 				<File
-					RelativePath="..\..\..\decoder\core\x86\block_add.asm"
-					>
-					<FileConfiguration
-						Name="Release|Win32"
-						>
-						<Tool
-							Name="VCCustomBuildTool"
-							CommandLine="nasm -I$(InputDir) -I$(InputDir)/../../../common/x86/ -f win32 -DPREFIX -DX86_32 -o $(IntDir)\$(InputName).obj $(InputPath)&#x0D;&#x0A;"
-							Outputs="$(IntDir)\$(InputName).obj"
-						/>
-					</FileConfiguration>
-					<FileConfiguration
-						Name="Release|x64"
-						>
-						<Tool
-							Name="VCCustomBuildTool"
-							CommandLine="nasm -I$(InputDir) -I$(InputDir)/../../../common/x86/ -f win64 -O3 -DWIN64 -o $(IntDir)\$(InputName).obj $(InputPath)&#x0D;&#x0A;"
-							Outputs="$(IntDir)\$(InputName).obj"
-						/>
-					</FileConfiguration>
-					<FileConfiguration
-						Name="Debug|Win32"
-						>
-						<Tool
-							Name="VCCustomBuildTool"
-							CommandLine="nasm -I$(InputDir) -I$(InputDir)/../../../common/x86/ -f win32 -DPREFIX -DX86_32 -o $(IntDir)\$(InputName).obj $(InputPath)&#x0D;&#x0A;"
-							Outputs="$(IntDir)\$(InputName).obj"
-						/>
-					</FileConfiguration>
-					<FileConfiguration
-						Name="Debug|x64"
-						>
-						<Tool
-							Name="VCCustomBuildTool"
-							CommandLine="nasm -I$(InputDir) -I$(InputDir)/../../../common/x86/ -f win64 -O3 -DWIN64 -o $(IntDir)\$(InputName).obj $(InputPath)&#x0D;&#x0A;"
-							Outputs="$(IntDir)\$(InputName).obj"
-						/>
-					</FileConfiguration>
-				</File>
-				<File
 					RelativePath="..\..\..\common\x86\cpuid.asm"
 					>
 					<FileConfiguration
--- a/codec/decoder/core/arm/block_add_neon.S
+++ b/codec/decoder/core/arm/block_add_neon.S
@@ -116,46 +116,6 @@
 WELS_ASM_FUNC_END
 
 
-//	r0 int16_t * block,
-//	r1	int32_t stride
-WELS_ASM_FUNC_BEGIN WelsResBlockZero16x16_neon// can use for 256*sizeof(int16_t)
-	push		{r2}
-	mov			r2, #16
-// each row 16 elements, 16*sizeof(int16_t)
-//	memset(ptr_dest, 0, 16*sizeof(int16_t));
-//	ptr_dest += stride;
-	lsl			r1, r1, #1	// r1 = 2*r1
-	veor.i16	q0, q0, q0
-	veor.i16	q1, q1, q1
-
-block_zero_16x16_luma_loop:
-	vst1.i16	{q0, q1}, [r0], r1
-	subs		r2,	r2, #2
-	vst1.i16	{q0, q1}, [r0], r1
-	bne			block_zero_16x16_luma_loop
-
-	pop		{r2}
-WELS_ASM_FUNC_END
-
-WELS_ASM_FUNC_BEGIN WelsResBlockZero8x8_neon// can use for 64*sizeof(int16_t)
-	push		{r2}
-	mov			r2, #8
-// each row 8 elements, 8*sizeof(int16_t)
-//	memset(ptr_dest, 0, 8*sizeof(int16_t));
-//	ptr_dest += stride;
-	lsl			r1, r1, #1
-	veor.i16	q0, q0, q0
-
-block_zero_8x8_chma_loop:
-	vst1.i16	{q0}, [r0], r1
-	subs		r2,	r2, #2
-	vst1.i16	{q0}, [r0], r1
-	bne			block_zero_8x8_chma_loop
-
-	pop		{r2}
-WELS_ASM_FUNC_END
-
-
 //	uint8_t *pred, const int32_t stride, int16_t *rs
 WELS_ASM_FUNC_BEGIN IdctResAddPred_neon
 
--- a/codec/decoder/core/inc/decode_slice.h
+++ b/codec/decoder/core/inc/decode_slice.h
@@ -37,8 +37,6 @@
 
 namespace WelsDec {
 
-void WelsBlockInit (int16_t* pBlock, int32_t iWidth, int32_t iHeight, int32_t iStride, uint8_t uiVal);
-
 int32_t WelsActualDecodeMbCavlcISlice (PWelsDecoderContext pCtx);
 int32_t WelsDecodeMbCavlcISlice (PWelsDecoderContext pCtx, PNalUnit pNalCur);
 
@@ -66,22 +64,13 @@
 #endif//__cplusplus
 
 #if defined(HAVE_NEON)
-void WelsResBlockZero16x16_neon(int16_t* pBlock, int32_t iStride);
-void WelsResBlockZero8x8_neon(int16_t* pBlock, int32_t iStride);
 void SetNonZeroCount_neon(int16_t* pBlock, int8_t* pNonZeroCount);
 #endif
 
-#ifdef  X86_ASM
-void WelsResBlockZero16x16_sse2 (int16_t* pBlock, int32_t iStride);
-void WelsResBlockZero8x8_sse2 (int16_t* pBlock, int32_t iStride);
-#endif
-
 #ifdef __cplusplus
 }
 #endif//__cplusplus
 
-void WelsBlockZero16x16_c (int16_t* pBlock, int32_t iStride);
-void WelsBlockZero8x8_c (int16_t* pBlock, int32_t iStride);
 void SetNonZeroCount_c (int16_t* pBlock, int8_t* pNonZeroCount);
 
 void WelsBlockFuncInit (SBlockFunc* pFunc,  int32_t iCpu);
--- a/codec/decoder/core/inc/decoder_context.h
+++ b/codec/decoder/core/inc/decoder_context.h
@@ -133,15 +133,9 @@
   PChromaDeblockingEQ4Func  pfChromaDeblockingEQ4Hor;
 } SDeblockingFunc, *PDeblockingFunc;
 
-typedef void (*PWelsBlockAddStrideFunc) (uint8_t* pDest, uint8_t* pPred, int16_t* pRes, int32_t iPredStride,
-    int32_t iResStride);
-typedef void (*PWelsBlockZeroFunc) (int16_t* pBlock, int32_t iStride);
 typedef void (*PWelsNonZeroCountFunc) (int16_t* pBlock, int8_t* pNonZeroCount);
-typedef void (*PWelsSimpleIdct4x4AddFunc) (int16_t* pDest, int16_t* pSrc, int32_t iStride);
 
 typedef  struct  TagBlockFunc {
-  PWelsBlockZeroFunc			pWelsBlockZero16x16Func;
-  PWelsBlockZeroFunc			pWelsBlockZero8x8Func;
   PWelsNonZeroCountFunc		pWelsSetNonZeroCountFunc;
 } SBlockFunc;
 
--- a/codec/decoder/core/src/decode_slice.cpp
+++ b/codec/decoder/core/src/decode_slice.cpp
@@ -842,9 +842,7 @@
     uiCbpL = pCurLayer->pCbp[iMbXy] & 15;
   }
 
-  pCtx->sBlockFunc.pWelsBlockZero16x16Func (pCurLayer->pScaledTCoeff[iMbXy], 16);
-  pCtx->sBlockFunc.pWelsBlockZero8x8Func (pCurLayer->pScaledTCoeff[iMbXy] + 256, 8);
-  pCtx->sBlockFunc.pWelsBlockZero8x8Func (pCurLayer->pScaledTCoeff[iMbXy] + 256 + 64, 8);
+  memset(pCurLayer->pScaledTCoeff[iMbXy], 0, MB_COEFF_LIST_SIZE * sizeof(int16_t));
 
   ST32 (&pCurLayer->pNzc[iMbXy][0], 0);
   ST32 (&pCurLayer->pNzc[iMbXy][4], 0);
@@ -1043,42 +1041,14 @@
   return 0;
 }
 
-void WelsBlockInit (int16_t* pBlock, int32_t iWidth, int32_t iHeight, int32_t iStride, uint8_t uiVal) {
-  int32_t i;
-  int16_t* pDst = pBlock;
-
-  for (i = 0; i < iHeight; i++) {
-    memset (pDst, uiVal, iWidth * sizeof (int16_t));
-    pDst += iStride;
-  }
-}
-
 void WelsBlockFuncInit (SBlockFunc*   pFunc,  int32_t iCpu) {
-  pFunc->pWelsBlockZero16x16Func		= WelsBlockZero16x16_c;
-  pFunc->pWelsBlockZero8x8Func	    = WelsBlockZero8x8_c;
   pFunc->pWelsSetNonZeroCountFunc	    = SetNonZeroCount_c;
 
-#ifdef  X86_ASM
-  if (iCpu & WELS_CPU_SSE2) {
-    pFunc->pWelsBlockZero16x16Func		= WelsResBlockZero16x16_sse2;
-    pFunc->pWelsBlockZero8x8Func	    = WelsResBlockZero8x8_sse2;
-  }
-#endif
-
 #ifdef	HAVE_NEON
   if ( iCpu & WELS_CPU_NEON ) {
-    pFunc->pWelsBlockZero16x16Func		= WelsResBlockZero16x16_neon;
-    pFunc->pWelsBlockZero8x8Func		= WelsResBlockZero8x8_neon;
     pFunc->pWelsSetNonZeroCountFunc		= SetNonZeroCount_neon;
   }
 #endif
-}
-void WelsBlockZero16x16_c (int16_t* pBlock, int32_t iStride) {
-  WelsBlockInit (pBlock, 16, 16, iStride, 0);
-}
-
-void WelsBlockZero8x8_c (int16_t* pBlock, int32_t iStride) {
-  WelsBlockInit (pBlock, 8, 8, iStride, 0);
 }
 
 void SetNonZeroCount_c (int16_t* pBlock, int8_t* pNonZeroCount) {
--- a/codec/decoder/core/x86/block_add.asm
+++ /dev/null
@@ -1,151 +1,0 @@
-;*!
-;* \copy
-;*     Copyright (c)  2009-2013, Cisco Systems
-;*     All rights reserved.
-;*
-;*     Redistribution and use in source and binary forms, with or without
-;*     modification, are permitted provided that the following conditions
-;*     are met:
-;*
-;*        * Redistributions of source code must retain the above copyright
-;*          notice, this list of conditions and the following disclaimer.
-;*
-;*        * Redistributions in binary form must reproduce the above copyright
-;*          notice, this list of conditions and the following disclaimer in
-;*          the documentation and/or other materials provided with the
-;*          distribution.
-;*
-;*     THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-;*     "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-;*     LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
-;*     FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
-;*     COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
-;*     INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
-;*     BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
-;*     LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
-;*     CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
-;*     LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
-;*     ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
-;*     POSSIBILITY OF SUCH DAMAGE.
-;*
-;*
-;*  block_add.asm
-;*
-;*  Abstract
-;*      add block
-;*
-;*  History
-;*      09/21/2009 Created
-;*
-;*
-;*************************************************************************/
-
-%include  "asm_inc.asm"
-
-;*******************************************************************************
-; Code
-;*******************************************************************************
-
-SECTION .text
-
-
-;*******************************************************************************
-;  void WelsResBlockZero16x16_sse2(int16_t* pBlock,int32_t iStride)
-;*******************************************************************************
-WELS_EXTERN   WelsResBlockZero16x16_sse2
-        %assign push_num 0
-        LOAD_2_PARA
-        PUSH_XMM 8
-	SIGN_EXTENSION r1, r1d
-        lea 	r1, 	[r1*2]
-        lea 	r2,	[r1*3]
-
-	pxor     xmm7,       xmm7
-
-    ; four  lines
-	movdqa   [r0],      xmm7
-	movdqa   [r0+10h],  xmm7
-
-	movdqa   [r0+r1],  xmm7
-	movdqa   [r0+r1+10h],     xmm7
-
-    movdqa   [r0+r1*2],   xmm7
-	movdqa   [r0+r1*2+10h],   xmm7
-
-	movdqa   [r0+r2],     xmm7
-	movdqa   [r0+r2+10h],     xmm7
-
-    ;  four lines
-	lea      r0,       [r0+r1*4]
-	movdqa   [r0],      xmm7
-	movdqa   [r0+10h],  xmm7
-
-	movdqa   [r0+r1],  xmm7
-	movdqa   [r0+r1+10h],     xmm7
-
-    movdqa   [r0+r1*2],   xmm7
-	movdqa   [r0+r1*2+10h],   xmm7
-
-	movdqa   [r0+r2],     xmm7
-	movdqa   [r0+r2+10h],     xmm7
-
-	;  four lines
-	lea      r0,       [r0+r1*4]
-	movdqa   [r0],      xmm7
-	movdqa   [r0+10h],  xmm7
-
-	movdqa   [r0+r1],  xmm7
-	movdqa   [r0+r1+10h],     xmm7
-
-    movdqa   [r0+r1*2],   xmm7
-	movdqa   [r0+r1*2+10h],   xmm7
-
-	movdqa   [r0+r2],     xmm7
-	movdqa   [r0+r2+10h],     xmm7
-
-	;  four lines
-	lea      r0,       [r0+r1*4]
-	movdqa   [r0],      xmm7
-	movdqa   [r0+10h],  xmm7
-
-	movdqa   [r0+r1],  xmm7
-	movdqa   [r0+r1+10h],     xmm7
-
-    movdqa   [r0+r1*2],   xmm7
-	movdqa   [r0+r1*2+10h],   xmm7
-
-	movdqa   [r0+r2],     xmm7
-	movdqa   [r0+r2+10h],     xmm7
-
-	POP_XMM
-	ret
-
-
-;*******************************************************************************
-;  void WelsResBlockZero8x8_sse2(int16_t * pBlock, int32_t iStride)
-;*******************************************************************************
-WELS_EXTERN   WelsResBlockZero8x8_sse2
-	  %assign push_num 0
-          LOAD_2_PARA
-          PUSH_XMM 8
-	  SIGN_EXTENSION r1, r1d
-	  lea       r1,     [r1*2]
-	  lea       r2,     [r1*3]
-
-	  pxor      xmm7,          xmm7
-
-	  movdqa    [r0],         xmm7
-	  movdqa    [r0+r1],     xmm7
-	  movdqa    [r0+r1*2],   xmm7
-	  movdqa    [r0+r2],     xmm7
-
-	  lea       r0,     [r0+r1*4]
-	  movdqa    [r0],         xmm7
-	  movdqa    [r0+r1],     xmm7
-	  movdqa    [r0+r1*2],   xmm7
-	  movdqa    [r0+r2],     xmm7
-
-
-	  POP_XMM
-	  ret
-
--- a/codec/decoder/targets.mk
+++ b/codec/decoder/targets.mk
@@ -27,7 +27,6 @@
 
 ifeq ($(ASM_ARCH), x86)
 DECODER_ASM_SRCS=\
-	$(DECODER_SRCDIR)/core/x86/block_add.asm\
 	$(DECODER_SRCDIR)/core/x86/dct.asm\
 	$(DECODER_SRCDIR)/core/x86/intra_pred.asm\