shithub: openh264

ref: e307cbb6a1c426e2dbb23054d02d6ebdaaa559c7
dir: /codec/decoder/core/asm/dct.asm/

View raw version
;*!
;* \copy
;*     Copyright (c)  2009-2013, Cisco Systems
;*     All rights reserved.
;*
;*     Redistribution and use in source and binary forms, with or without
;*     modification, are permitted provided that the following conditions
;*     are met:
;*
;*        ?Redistributions of source code must retain the above copyright
;*          notice, this list of conditions and the following disclaimer.
;*
;*        ?Redistributions in binary form must reproduce the above copyright
;*          notice, this list of conditions and the following disclaimer in
;*          the documentation and/or other materials provided with the
;*          distribution.
;*
;*     THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
;*     "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
;*     LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
;*     FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
;*     COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
;*     INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
;*     BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
;*     LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
;*     CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
;*     LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
;*     ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
;*     POSSIBILITY OF SUCH DAMAGE.
;*
;*
;*  dct.asm
;*
;*  Abstract
;*      WelsDctFourT4_sse2
;*
;*  History
;*      8/4/2009 Created
;*
;*
;*************************************************************************/

%include "asm_inc.asm"

BITS 32

;*******************************************************************************
; Macros and other preprocessor constants
;*******************************************************************************
%macro MMX_SumSubDiv2 3
    movq    %3, %2
    psraw   %3, $1
    paddw   %3, %1
    psraw   %1, $1
    psubw   %1, %2
%endmacro

%macro MMX_SumSub 3
	movq    %3, %2
    psubw   %2, %1
    paddw   %1, %3
%endmacro

%macro MMX_IDCT 6
    MMX_SumSub      %4, %5, %6
    MMX_SumSubDiv2  %3, %2, %1
    MMX_SumSub		%1, %4, %6
	MMX_SumSub		%3, %5, %6
%endmacro


%macro MMX_StoreDiff4P 5
    movd       %2, %5
    punpcklbw  %2, %4
    paddw      %1, %3
    psraw      %1, $6
    paddsw     %1, %2
    packuswb   %1, %2
    movd       %5, %1
%endmacro

;*******************************************************************************
; Code
;*******************************************************************************

SECTION .text

WELS_EXTERN IdctResAddPred_mmx

ALIGN 16
;*******************************************************************************
;   void_t __cdecl IdctResAddPred_mmx( uint8_t *pPred, const int32_t kiStride, int16_t *pRs )
;*******************************************************************************

IdctResAddPred_mmx:

%define	pushsize	0
%define pPred       esp+pushsize+4
%define kiStride     esp+pushsize+8
%define pRs         esp+pushsize+12

	mov     eax, [pRs   ]
    mov     edx, [pPred ]
    mov     ecx, [kiStride]
    movq    mm0, [eax+ 0]
    movq    mm1, [eax+ 8]
    movq    mm2, [eax+16]
    movq    mm3, [eax+24]

	MMX_Trans4x4W        mm0, mm1, mm2, mm3, mm4
	MMX_IDCT			mm1, mm2, mm3, mm4, mm0, mm6
    MMX_Trans4x4W        mm1, mm3, mm0, mm4, mm2
	MMX_IDCT			mm3, mm0, mm4, mm2, mm1, mm6

    WELS_Zero			mm7
    WELS_DW32			mm6

    MMX_StoreDiff4P    mm3, mm0, mm6, mm7, [edx]
    MMX_StoreDiff4P    mm4, mm0, mm6, mm7, [edx+ecx]
    lea     edx, [edx+2*ecx]
    MMX_StoreDiff4P    mm1, mm0, mm6, mm7, [edx]
    MMX_StoreDiff4P    mm2, mm0, mm6, mm7, [edx+ecx]

%undef	pushsize
%undef  pPred
%undef  kiStride
%undef  pRs
	emms
    ret