ref: eb238e654972292004294b2cc49093bdebb759d0
parent: 1449c83f57ad535cadc79a4f81eab95592c5858e
author: Martin Storsjö <martin@martin.st>
date: Fri Mar 14 10:32:41 EDT 2014
Use the SIGN_EXTENSION macro where possible This shortens the x86 assembly by 134 lines in total.
--- a/codec/common/mb_copy.asm
+++ b/codec/common/mb_copy.asm
@@ -442,12 +442,10 @@
%assign push_num 0
LOAD_7_PARA
-%ifndef X86_32
- movsx r1, r1d
- movsx r3, r3d
- movsx r5, r5d
- movsx r6, r6d
-%endif
+ SIGN_EXTENSION r1, r1d
+ SIGN_EXTENSION r3, r3d
+ SIGN_EXTENSION r5, r5d
+ SIGN_EXTENSION r6, r6d
ALIGN 4
.height_loop:
@@ -491,12 +489,10 @@
%assign push_num 0
LOAD_7_PARA
-%ifndef X86_32
- movsx r1, r1d
- movsx r3, r3d
- movsx r5, r5d
- movsx r6, r6d
-%endif
+ SIGN_EXTENSION r1, r1d
+ SIGN_EXTENSION r3, r3d
+ SIGN_EXTENSION r5, r5d
+ SIGN_EXTENSION r6, r6d
ALIGN 4
.height_loop:
@@ -531,12 +527,10 @@
%assign push_num 0
LOAD_7_PARA
-%ifndef X86_32
- movsx r1, r1d
- movsx r3, r3d
- movsx r5, r5d
- movsx r6, r6d
-%endif
+ SIGN_EXTENSION r1, r1d
+ SIGN_EXTENSION r3, r3d
+ SIGN_EXTENSION r5, r5d
+ SIGN_EXTENSION r6, r6d
ALIGN 4
.height_loop:
movdqu xmm0, [r2]
@@ -596,11 +590,9 @@
%assign push_num 1
LOAD_5_PARA
-%ifndef X86_32
- movsx r1, r1d
- movsx r3, r3d
- movsx r4, r4d
-%endif
+ SIGN_EXTENSION r1, r1d
+ SIGN_EXTENSION r3, r3d
+ SIGN_EXTENSION r4, r4d
ALIGN 4
.height_loop:
@@ -633,11 +625,9 @@
%assign push_num 0
LOAD_5_PARA
-%ifndef X86_32
- movsx r1, r1d
- movsx r3, r3d
- movsx r4, r4d
-%endif
+ SIGN_EXTENSION r1, r1d
+ SIGN_EXTENSION r3, r3d
+ SIGN_EXTENSION r4, r4d
ALIGN 4
.height_loop:
@@ -680,11 +670,9 @@
%assign push_num 0
LOAD_5_PARA
-%ifndef X86_32
- movsx r1, r1d
- movsx r3, r3d
- movsx r4, r4d
-%endif
+ SIGN_EXTENSION r1, r1d
+ SIGN_EXTENSION r3, r3d
+ SIGN_EXTENSION r4, r4d
ALIGN 4
.height_loop:
SSE_READ_UNA xmm0, r0
--- a/codec/common/mc_chroma.asm
+++ b/codec/common/mc_chroma.asm
@@ -82,11 +82,9 @@
%assign push_num 0
LOAD_6_PARA
-%ifndef X86_32
- movsx r1, r1d
- movsx r3, r3d
- movsx r5, r5d
-%endif
+ SIGN_EXTENSION r1, r1d
+ SIGN_EXTENSION r3, r3d
+ SIGN_EXTENSION r5, r5d
;mov eax, [esp +12 + 20]
@@ -174,11 +172,9 @@
%assign push_num 0
LOAD_6_PARA
-%ifndef X86_32
- movsx r1, r1d
- movsx r3, r3d
- movsx r5, r5d
-%endif
+ SIGN_EXTENSION r1, r1d
+ SIGN_EXTENSION r3, r3d
+ SIGN_EXTENSION r5, r5d
;mov eax, [esp +12 + 20]
movd xmm3, [r4]
@@ -268,11 +264,9 @@
;push edi
%assign push_num 0
LOAD_6_PARA
-%ifndef X86_32
- movsx r1, r1d
- movsx r3, r3d
- movsx r5, r5d
-%endif
+ SIGN_EXTENSION r1, r1d
+ SIGN_EXTENSION r3, r3d
+ SIGN_EXTENSION r5, r5d
;mov eax, [esp + 12 + 20]
--- a/codec/common/mc_luma.asm
+++ b/codec/common/mc_luma.asm
@@ -94,11 +94,9 @@
%assign push_num 0
LOAD_5_PARA
-%ifndef X86_32
- movsx r1, r1d
- movsx r3, r3d
- movsx r4, r4d
-%endif
+ SIGN_EXTENSION r1, r1d
+ SIGN_EXTENSION r3, r3d
+ SIGN_EXTENSION r4, r4d
sub r0, 2
WELS_Zero mm7
@@ -198,11 +196,9 @@
%assign push_num 0
LOAD_5_PARA
-%ifndef X86_32
- movsx r1, r1d
- movsx r3, r3d
- movsx r4, r4d
-%endif
+ SIGN_EXTENSION r1, r1d
+ SIGN_EXTENSION r3, r3d
+ SIGN_EXTENSION r4, r4d
pxor xmm7, xmm7
sub r0, r1 ;;;;;;;;need more 5 lines.
@@ -260,11 +256,9 @@
%assign push_num 0
LOAD_5_PARA
-%ifndef X86_32
- movsx r1, r1d
- movsx r3, r3d
- movsx r4, r4d
-%endif
+ SIGN_EXTENSION r1, r1d
+ SIGN_EXTENSION r3, r3d
+ SIGN_EXTENSION r4, r4d
lea r0, [r0-2] ;pSrc -= 2;
pxor xmm7, xmm7
@@ -325,11 +319,9 @@
%assign push_num 0
LOAD_5_PARA
-%ifndef X86_32
- movsx r1, r1d
- movsx r3, r3d
- movsx r4, r4d
-%endif
+ SIGN_EXTENSION r1, r1d
+ SIGN_EXTENSION r3, r3d
+ SIGN_EXTENSION r4, r4d
lea r0, [r0-2] ;pSrc -= 2;
pxor xmm7, xmm7
@@ -416,11 +408,9 @@
%assign push_num 0
LOAD_5_PARA
-%ifndef X86_32
- movsx r1, r1d
- movsx r3, r3d
- movsx r4, r4d
-%endif
+ SIGN_EXTENSION r1, r1d
+ SIGN_EXTENSION r3, r3d
+ SIGN_EXTENSION r4, r4d
sub r0, r1
sub r0, r1
@@ -526,12 +516,10 @@
%assign push_num 0
LOAD_6_PARA
-%ifndef X86_32
- movsx r1, r1d
- movsx r3, r3d
- movsx r4, r4d
- movsx r5, r5d
-%endif
+ SIGN_EXTENSION r1, r1d
+ SIGN_EXTENSION r3, r3d
+ SIGN_EXTENSION r4, r4d
+ SIGN_EXTENSION r5, r5d
%ifndef X86_32
push r12
@@ -675,12 +663,10 @@
%assign push_num 0
LOAD_6_PARA
-%ifndef X86_32
- movsx r1, r1d
- movsx r3, r3d
- movsx r4, r4d
- movsx r5, r5d
-%endif
+ SIGN_EXTENSION r1, r1d
+ SIGN_EXTENSION r3, r3d
+ SIGN_EXTENSION r4, r4d
+ SIGN_EXTENSION r5, r5d
sub r0, 2
pxor xmm7, xmm7
@@ -845,12 +831,10 @@
%assign push_num 0
LOAD_6_PARA
-%ifndef X86_32
- movsx r1, r1d
- movsx r3, r3d
- movsx r4, r4d
- movsx r5, r5d
-%endif
+ SIGN_EXTENSION r1, r1d
+ SIGN_EXTENSION r3, r3d
+ SIGN_EXTENSION r4, r4d
+ SIGN_EXTENSION r5, r5d
pxor xmm7, xmm7
sub r0, r1 ;;;;;;;;need more 5 lines.
sub r0, r1
@@ -1026,13 +1010,11 @@
%assign push_num 0
LOAD_6_PARA
+ SIGN_EXTENSION r1, r1d
+ SIGN_EXTENSION r3, r3d
+ SIGN_EXTENSION r4, r4d
+ SIGN_EXTENSION r5, r5d
%ifndef X86_32
- movsx r1, r1d
- movsx r3, r3d
- movsx r4, r4d
- movsx r5, r5d
-%endif
-%ifndef X86_32
push r12
push r13
push r14
@@ -1172,12 +1154,10 @@
%assign push_num 0
LOAD_6_PARA
-%ifndef X86_32
- movsx r1, r1d
- movsx r3, r3d
- movsx r4, r4d
- movsx r5, r5d
-%endif
+ SIGN_EXTENSION r1, r1d
+ SIGN_EXTENSION r3, r3d
+ SIGN_EXTENSION r4, r4d
+ SIGN_EXTENSION r5, r5d
%ifndef X86_32
push r12
push r13
--- a/codec/decoder/core/asm/block_add.asm
+++ b/codec/decoder/core/asm/block_add.asm
@@ -59,9 +59,7 @@
;push r0
%assign push_num 0
LOAD_2_PARA
- %ifndef X86_32
- movsx r1, r1d
- %endif
+ SIGN_EXTENSION r1, r1d
;mov r0, [esp+08h]
;mov r1, [esp+0ch]
;lea r1, [r1*2]
@@ -140,9 +138,7 @@
;push r0
%assign push_num 0
LOAD_2_PARA
- %ifndef X86_32
- movsx r1, r1d
- %endif
+ SIGN_EXTENSION r1, r1d
;mov r0, [esp+08h]
;mov r1, [esp+0ch]
lea r1, [r1*2]
--- a/codec/decoder/core/asm/dct.asm
+++ b/codec/decoder/core/asm/dct.asm
@@ -93,9 +93,7 @@
IdctResAddPred_mmx:
%assign push_num 0
LOAD_3_PARA
- %ifndef X86_32
- movsx r1, r1d
- %endif
+ SIGN_EXTENSION r1, r1d
movq mm0, [r2+ 0]
movq mm1, [r2+ 8]
movq mm2, [r2+16]
--- a/codec/decoder/core/asm/intra_pred.asm
+++ b/codec/decoder/core/asm/intra_pred.asm
@@ -191,9 +191,7 @@
WelsDecoderI4x4LumaPredH_sse2:
%assign push_num 0
LOAD_2_PARA
- %ifndef X86_32
- movsx r1, r1d
- %endif
+ SIGN_EXTENSION r1, r1d
;mov eax, [esp+4] ;pPred
;mov ecx, [esp+8] ;kiStride
@@ -232,9 +230,7 @@
push r4
%assign push_num 2
LOAD_2_PARA
- %ifndef X86_32
- movsx r1, r1d
- %endif
+ SIGN_EXTENSION r1, r1d
mov r4, r0 ; save r0 in r4
;push esi
;mov esi, [esp + pushsize + 4]
@@ -341,9 +337,7 @@
WelsDecoderI16x16LumaPredH_sse2:
%assign push_num 0
LOAD_2_PARA
- %ifndef X86_32
- movsx r1, r1d
- %endif
+ SIGN_EXTENSION r1, r1d
;mov eax, [esp+4] ; pPred
;mov ecx, [esp+8] ; kiStride
@@ -369,9 +363,7 @@
WelsDecoderI16x16LumaPredV_sse2:
%assign push_num 0
LOAD_2_PARA
- %ifndef X86_32
- movsx r1, r1d
- %endif
+ SIGN_EXTENSION r1, r1d
;mov edx, [esp+4] ; pPred
;mov ecx, [esp+8] ; kiStride
@@ -415,9 +407,7 @@
push r4
%assign push_num 2
LOAD_2_PARA
- %ifndef X86_32
- movsx r1, r1d
- %endif
+ SIGN_EXTENSION r1, r1d
mov r4, r0
;push esi
;mov esi, [esp + pushsize + 4] ;pPred
@@ -521,9 +511,7 @@
WelsDecoderI4x4LumaPredDDR_mmx:
%assign push_num 0
LOAD_2_PARA
- %ifndef X86_32
- movsx r1, r1d
- %endif
+ SIGN_EXTENSION r1, r1d
mov r2, r0
;mov edx,[esp+4] ;pPred
;mov eax,edx
@@ -596,9 +584,7 @@
WelsDecoderIChromaPredH_mmx:
%assign push_num 0
LOAD_2_PARA
- %ifndef X86_32
- movsx r1, r1d
- %endif
+ SIGN_EXTENSION r1, r1d
mov r2, r0
;mov edx, [esp+4] ;pPred
;mov eax, edx
@@ -644,9 +630,7 @@
WelsDecoderIChromaPredV_mmx:
%assign push_num 0
LOAD_2_PARA
- %ifndef X86_32
- movsx r1, r1d
- %endif
+ SIGN_EXTENSION r1, r1d
;mov eax, [esp+4] ;pPred
;mov ecx, [esp+8] ;kiStride
@@ -703,9 +687,7 @@
WelsDecoderI4x4LumaPredHD_mmx:
%assign push_num 0
LOAD_2_PARA
- %ifndef X86_32
- movsx r1, r1d
- %endif
+ SIGN_EXTENSION r1, r1d
mov r2, r0
;mov edx, [esp+4] ; pPred
;mov eax, edx
@@ -792,9 +774,7 @@
WelsDecoderI4x4LumaPredHU_mmx:
%assign push_num 0
LOAD_2_PARA
- %ifndef X86_32
- movsx r1, r1d
- %endif
+ SIGN_EXTENSION r1, r1d
mov r2, r0
;mov edx, [esp+4] ; pPred
;mov eax, edx
@@ -884,9 +864,7 @@
WelsDecoderI4x4LumaPredVR_mmx:
%assign push_num 0
LOAD_2_PARA
- %ifndef X86_32
- movsx r1, r1d
- %endif
+ SIGN_EXTENSION r1, r1d
mov r2, r0
;mov edx, [esp+4] ; pPred
;mov eax, edx
@@ -977,9 +955,7 @@
WelsDecoderI4x4LumaPredDDL_mmx:
%assign push_num 0
LOAD_2_PARA
- %ifndef X86_32
- movsx r1, r1d
- %endif
+ SIGN_EXTENSION r1, r1d
mov r2, r0
;mov edx, [esp+4] ; pPred
;mov eax, edx
@@ -1052,9 +1028,7 @@
WelsDecoderI4x4LumaPredVL_mmx:
%assign push_num 0
LOAD_2_PARA
- %ifndef X86_32
- movsx r1, r1d
- %endif
+ SIGN_EXTENSION r1, r1d
mov r2, r0
;mov edx, [esp+4] ; pPred
;mov eax, edx
@@ -1101,9 +1075,7 @@
push r4
%assign push_num 2
LOAD_2_PARA
- %ifndef X86_32
- movsx r1, r1d
- %endif
+ SIGN_EXTENSION r1, r1d
mov r4, r0
;push ebx
;mov eax, [esp+8] ; pPred
@@ -1209,9 +1181,7 @@
push r4
%assign push_num 2
LOAD_2_PARA
- %ifndef X86_32
- movsx r1, r1d
- %endif
+ SIGN_EXTENSION r1, r1d
mov r4, r0
sub r0, r1
movdqa xmm0, [r0] ; read one row
@@ -1296,9 +1266,7 @@
;mov ebx, [esp+PUSH_SIZE+8] ; kiStride
%assign push_num 0
LOAD_2_PARA
- %ifndef X86_32
- movsx r1, r1d
- %endif
+ SIGN_EXTENSION r1, r1d
mov r2, r0
sub r2, r1
movdqa xmm0, [r2] ; pPred-kiStride, top line
@@ -1378,9 +1346,7 @@
;mov ebx, [esp+PUSH_SIZE+8] ; kiStride
%assign push_num 0
LOAD_2_PARA
- %ifndef X86_32
- movsx r1, r1d
- %endif
+ SIGN_EXTENSION r1, r1d
lea r2, [2*r1+r1] ; 3*kiStride
movdqa xmm0, [sse2_dc_0x80]
@@ -1426,9 +1392,7 @@
push r4
%assign push_num 2
LOAD_2_PARA
- %ifndef X86_32
- movsx r1, r1d
- %endif
+ SIGN_EXTENSION r1, r1d
mov r4, r0
; for left
dec r0
@@ -1507,9 +1471,7 @@
;neg ebx
%assign push_num 0
LOAD_2_PARA
- %ifndef X86_32
- movsx r1, r1d
- %endif
+ SIGN_EXTENSION r1, r1d
mov r2, r0
sub r2, r1
movq xmm0, [r2] ; top: 8x1 pixels
@@ -1554,9 +1516,7 @@
;mov ebx, [esp+PUSH_SIZE+8] ; kiStride
%assign push_num 0
LOAD_2_PARA
- %ifndef X86_32
- movsx r1, r1d
- %endif
+ SIGN_EXTENSION r1, r1d
lea r2, [2*r1+r1]
movq mm0, [sse2_dc_0x80]
movq mm1, mm0
--- a/codec/encoder/core/asm/dct.asm
+++ b/codec/encoder/core/asm/dct.asm
@@ -143,10 +143,8 @@
;mov edx, [esp+24] ; i_pix2
%assign push_num 0
LOAD_5_PARA
- %ifndef X86_32
- movsx r2, r2d
- movsx r4, r4d
- %endif
+ SIGN_EXTENSION r2, r2d
+ SIGN_EXTENSION r4, r4d
WELS_Zero mm7
MMX_LoadDiff4x4P mm1, mm2, mm3, mm4, r1, r2, r3, r4, mm0, mm7
@@ -182,10 +180,8 @@
;%define pDct esp+pushsize+20
%assign push_num 0
LOAD_5_PARA
- %ifndef X86_32
- movsx r1, r1d
- movsx r3, r3d
- %endif
+ SIGN_EXTENSION r1, r1d
+ SIGN_EXTENSION r3, r3d
; mov eax, [pDct ]
movq mm0, [r4+ 0]
movq mm1, [r4+ 8]
@@ -332,10 +328,8 @@
;mov edx, [esp+28] ; i_pix2
%assign push_num 0
LOAD_5_PARA
- %ifndef X86_32
- movsx r2, r2d
- movsx r4, r4d
- %endif
+ SIGN_EXTENSION r2, r2d
+ SIGN_EXTENSION r4, r4d
pxor xmm7, xmm7
;Load 4x8
SSE2_LoadDiff8P xmm0, xmm6, xmm7, [r1], [r3]
@@ -399,10 +393,8 @@
; mov esi, [rs]
%assign push_num 0
LOAD_5_PARA
- %ifndef X86_32
- movsx r1, r1d
- movsx r3, r3d
- %endif
+ SIGN_EXTENSION r1, r1d
+ SIGN_EXTENSION r3, r3d
;Load 4x8
SSE2_Load4x8p r4, xmm0, xmm1, xmm4, xmm2, xmm5
@@ -462,10 +454,8 @@
WelsIDctRecI16x16Dc_sse2:
%assign push_num 0
LOAD_5_PARA
- %ifndef X86_32
- movsx r1, r1d
- movsx r3, r3d
- %endif
+ SIGN_EXTENSION r1, r1d
+ SIGN_EXTENSION r3, r3d
; push esi
; push edi
--- a/codec/encoder/core/asm/intra_pred.asm
+++ b/codec/encoder/core/asm/intra_pred.asm
@@ -203,9 +203,7 @@
push r3
%assign push_num 1
LOAD_3_PARA
- %ifndef X86_32
- movsx r2, r2d
- %endif
+ SIGN_EXTENSION r2, r2d
movzx r3, byte [r1-1]
movd xmm0, r3d
pmuludq xmm0, [mmx_01bytes]
@@ -244,9 +242,7 @@
push r4
%assign push_num 2
LOAD_3_PARA
- %ifndef X86_32
- movsx r2, r2d
- %endif
+ SIGN_EXTENSION r2, r2d
sub r1, 1
sub r1, r2
@@ -343,9 +339,7 @@
push r3
%assign push_num 1
LOAD_3_PARA
- %ifndef X86_32
- movsx r2, r2d
- %endif
+ SIGN_EXTENSION r2, r2d
dec r1
movzx r3, byte [r1]
SSE2_Copy16Times xmm0, r3d
@@ -378,9 +372,7 @@
;mov ecx, [esp+12] ; stride
%assign push_num 0
LOAD_3_PARA
- %ifndef X86_32
- movsx r2, r2d
- %endif
+ SIGN_EXTENSION r2, r2d
sub r1, r2
movdqa xmm0, [r1]
@@ -416,9 +408,7 @@
push r4
%assign push_num 2
LOAD_3_PARA
- %ifndef X86_32
- movsx r2, r2d
- %endif
+ SIGN_EXTENSION r2, r2d
sub r1, 1
sub r1, r2
@@ -517,9 +507,7 @@
;mov ecx,[esp+12] ;stride
%assign push_num 0
LOAD_3_PARA
- %ifndef X86_32
- movsx r2, r2d
- %endif
+ SIGN_EXTENSION r2, r2d
movq mm1,[r1+r2-8] ;get value of 11,decreasing 8 is trying to improve the performance of movq mm1[8] = 11
movq mm2,[r1-8] ;get value of 6 mm2[8] = 6
sub r1, r2 ;mov eax to above line of current block(postion of 1)
@@ -575,9 +563,7 @@
push r4
%assign push_num 2
LOAD_3_PARA
- %ifndef X86_32
- movsx r2, r2d
- %endif
+ SIGN_EXTENSION r2, r2d
movzx r4, byte [r1-1h]
sub r1, r2
movd xmm0, [r1]
@@ -638,9 +624,7 @@
;mov ecx, [esp+12] ;stride
%assign push_num 0
LOAD_3_PARA
- %ifndef X86_32
- movsx r2, r2d
- %endif
+ SIGN_EXTENSION r2, r2d
movq mm0, [r1-8]
psrlq mm0, 38h
@@ -677,9 +661,7 @@
WelsI4x4LumaPredV_sse2:
%assign push_num 0
LOAD_3_PARA
- %ifndef X86_32
- movsx r2, r2d
- %endif
+ SIGN_EXTENSION r2, r2d
sub r1, r2
movd xmm0, [r1]
pshufd xmm0, xmm0, 0
@@ -695,9 +677,7 @@
WelsIChromaPredV_sse2:
%assign push_num 0
LOAD_3_PARA
- %ifndef X86_32
- movsx r2, r2d
- %endif
+ SIGN_EXTENSION r2, r2d
sub r1, r2
movq xmm0, [r1]
movdqa xmm1, xmm0
@@ -742,9 +722,7 @@
WelsI4x4LumaPredHD_mmx:
%assign push_num 0
LOAD_3_PARA
- %ifndef X86_32
- movsx r2, r2d
- %endif
+ SIGN_EXTENSION r2, r2d
sub r1, r2
movd mm0, [r1-1] ; mm0 = [xx xx xx xx t2 t1 t0 lt]
psllq mm0, 20h ; mm0 = [t2 t1 t0 lt xx xx xx xx]
@@ -823,9 +801,7 @@
WelsI4x4LumaPredHU_mmx:
%assign push_num 0
LOAD_3_PARA
- %ifndef X86_32
- movsx r2, r2d
- %endif
+ SIGN_EXTENSION r2, r2d
movd mm0, [r1-4] ; mm0[3] = l0
punpcklbw mm0, [r1+r2-4] ; mm0[7] = l1, mm0[6] = l0
lea r1, [r1+2*r2]
@@ -908,9 +884,7 @@
WelsI4x4LumaPredVR_mmx:
%assign push_num 0
LOAD_3_PARA
- %ifndef X86_32
- movsx r2, r2d
- %endif
+ SIGN_EXTENSION r2, r2d
sub r1, r2
movq mm0, [r1-1] ; mm0 = [xx xx xx t3 t2 t1 t0 lt]
psllq mm0, 18h ; mm0 = [t3 t2 t1 t0 lt xx xx xx]
@@ -996,9 +970,7 @@
WelsI4x4LumaPredDDL_mmx:
%assign push_num 0
LOAD_3_PARA
- %ifndef X86_32
- movsx r2, r2d
- %endif
+ SIGN_EXTENSION r2, r2d
sub r1, r2
movq mm0, [r1] ; mm0 = [t7 t6 t5 t4 t3 t2 t1 t0]
movq mm1, mm0
@@ -1066,9 +1038,7 @@
WelsI4x4LumaPredVL_mmx:
%assign push_num 0
LOAD_3_PARA
- %ifndef X86_32
- movsx r2, r2d
- %endif
+ SIGN_EXTENSION r2, r2d
sub r1, r2
movq mm0, [r1] ; mm0 = [t7 t6 t5 t4 t3 t2 t1 t0]
movq mm1, mm0
@@ -1109,9 +1079,7 @@
push r4
%assign push_num 2
LOAD_3_PARA
- %ifndef X86_32
- movsx r2, r2d
- %endif
+ SIGN_EXTENSION r2, r2d
sub r1, r2
movq mm0, [r1]
@@ -1203,9 +1171,7 @@
push r4
%assign push_num 2
LOAD_3_PARA
- %ifndef X86_32
- movsx r2, r2d
- %endif
+ SIGN_EXTENSION r2, r2d
sub r1, r2
movdqa xmm0, [r1] ; read one row
pxor xmm1, xmm1
--- a/codec/encoder/core/asm/quant.asm
+++ b/codec/encoder/core/asm/quant.asm
@@ -106,10 +106,8 @@
WelsQuant4x4Dc_sse2:
%assign push_num 0
LOAD_3_PARA
- %ifndef X86_32
- movsx r1, r1w
- movsx r2, r2w
- %endif
+ SIGN_EXTENSION r1, r1w
+ SIGN_EXTENSION r2, r2w
;mov ax, [mf]
SSE2_Copy8Times xmm3, r2d
@@ -216,10 +214,8 @@
WelsHadamardQuant2x2_mmx:
%assign push_num 0
LOAD_5_PARA
- %ifndef X86_32
- movsx r1, r1w
- movsx r2, r2w
- %endif
+ SIGN_EXTENSION r1, r1w
+ SIGN_EXTENSION r2, r2w
;mov eax, [pDct]
movd mm0, [r0]
movd mm1, [r0 + 0x20]
@@ -281,10 +277,8 @@
WelsHadamardQuant2x2Skip_mmx:
%assign push_num 0
LOAD_3_PARA
- %ifndef X86_32
- movsx r1, r1w
- movsx r2, r2w
- %endif
+ SIGN_EXTENSION r1, r1w
+ SIGN_EXTENSION r2, r2w
;mov eax, [pDct]
movd mm0, [r0]
movd mm1, [r0 + 0x20]