shithub: openh264

Download patch

ref: a50d54f8e8cf80a5c0313847ef488e2eb42ce409
parent: bbe51935c780fe2747cb757e8f81635be119f112
parent: a6463be0cc7503db58da27aa8001b4b8ac26f646
author: Ethan Hugg <ethanhugg@gmail.com>
date: Sat Jan 18 03:55:49 EST 2014

Merge pull request #162 from Vproject/yasm

Allow yasm to be used instead of nasm.

--- a/codec/common/mb_copy.asm
+++ b/codec/common/mb_copy.asm
@@ -392,7 +392,7 @@
 	;mov eax, [esp+4]	; mv_buffer
 	;movd xmm0, [esp+8]	; _mv
 	movd xmm0, r1d	; _mv
-	pshufd xmm1, xmm0, $0
+	pshufd xmm1, xmm0, $00
 	movdqa [r0     ], xmm1
 	movdqa [r0+0x10], xmm1
 	movdqa [r0+0x20], xmm1
--- a/codec/common/vaa.asm
+++ b/codec/common/vaa.asm
@@ -101,7 +101,7 @@
 	punpcklwd %1, %2
 	punpckhwd %3, %4
 	punpcklwd %1, %3
-	psraw %1, $4
+	psraw %1, $04
 %endmacro
 
 %macro VAA_AVG_BLOCK_SSSE3 6 ; dst, t0, t1, t2, t3, t4
@@ -129,7 +129,7 @@
 	paddw %2, %4	; block 2, 3
 	phaddw %1, %2	; block[0]: 0-15, 16-31; block[1]: 32-47, 48-63; ..
 	phaddw %1, xmm7	; block[0]: 0-15; block[1]: 16-31; block[2]: 32-47; block[3]: 48-63; ....
-	psraw %1, $4
+	psraw %1, $04
 %endmacro
 
 
@@ -178,12 +178,12 @@
 
 
     mov r2,r1
-    sal r2,$1   ;r2 = 2*iLineSize
+    sal r2,$01   ;r2 = 2*iLineSize
     mov r3,r2
     add r3,r1   ;r3 = 3*iLineSize
 
     mov r4,r2
-    sal r4,$1   ;r4 = 4*iLineSize
+    sal r4,$01   ;r4 = 4*iLineSize
 
 	pxor xmm7, xmm7
 
@@ -231,7 +231,7 @@
 	and r2, 0ffffh		; effective low work truncated
 	mov r3, r2
 	imul r2, r3
-	sar r2, $4
+	sar r2, $04
 	movd retrd, xmm1
 	sub retrd, r2d
 
@@ -273,12 +273,12 @@
 
 
     mov r2,r1
-    sal r2,$1   ;r2 = 2*iLineSize
+    sal r2,$01   ;r2 = 2*iLineSize
     mov r3,r2
     add r3,r1   ;r3 = 3*iLineSize
 
     mov r4,r2
-    sal r4,$1   ;r4 = 4*iLineSize
+    sal r4,$01   ;r4 = 4*iLineSize
 
 	pxor xmm7, xmm7
 
@@ -327,7 +327,7 @@
     and r2, 0ffffh          ; effective low work truncated
     mov r3, r2
     imul r2, r3
-    sar r2, $4
+    sar r2, $04
     movd retrd, xmm1
 	sub retrd, r2d
 
--- a/codec/encoder/core/asm/dct.asm
+++ b/codec/encoder/core/asm/dct.asm
@@ -86,17 +86,17 @@
 
 %macro MMX_SumSubMul2 3
 	movq    %3, %1
-	psllw   %1, $1
+	psllw   %1, $01
 	paddw   %1, %2
-	psllw   %2, $1
+	psllw   %2, $01
     psubw   %3, %2
 %endmacro
 
 %macro MMX_SumSubDiv2 3
     movq    %3, %2
-    psraw   %3, $1
+    psraw   %3, $01
     paddw   %3, %1
-    psraw   %1, $1
+    psraw   %1, $01
     psubw   %1, %2
 %endmacro
 
@@ -124,7 +124,7 @@
     movd       %2, %6
     punpcklbw  %2, %4
     paddw      %1, %3
-    psraw      %1, $6
+    psraw      %1, $06
     paddsw     %1, %2
     packuswb   %1, %2
     movd       %5, %1
@@ -255,8 +255,8 @@
 %macro SSE2_SumSubDiv2 4
     movdqa  %4, %1
     movdqa  %3, %2
-    psraw   %2, $1
-    psraw   %4, $1
+    psraw   %2, $01
+    psraw   %4, $01
     paddw   %1, %2
     psubw   %4, %3
 %endmacro
@@ -263,7 +263,7 @@
 
 %macro SSE2_StoreDiff8p 6
     paddw       %1, %3
-    psraw       %1, $6
+    psraw       %1, $06
     movq		%2, %6
     punpcklbw   %2, %4
     paddsw      %2, %1
@@ -282,7 +282,7 @@
 %macro SSE2_Load8DC	6
 	movdqa		%1,		%6		; %1 = dc0 dc1
 	paddw       %1,		%5
-    psraw       %1,		$6		; (dc + 32) >> 6
+    psraw       %1,		$06		; (dc + 32) >> 6
 
     movdqa		%2,		%1
     psrldq		%2,		4
--- a/codec/processing/src/asm/downsample_bilinear.asm
+++ b/codec/processing/src/asm/downsample_bilinear.asm
@@ -94,13 +94,13 @@
 	mov ecx, [esp+36]	; iSrcStride
 	mov ebp, [esp+44]	; iSrcHeight
 
-	sar ebp, $1			; iSrcHeight >> 1
+	sar ebp, $01			; iSrcHeight >> 1
 
 .yloops:
 	mov eax, [esp+40]	; iSrcWidth
-	sar eax, $1			; iSrcWidth >> 1
+	sar eax, $01			; iSrcWidth >> 1
 	mov ebx, eax		; iDstWidth restored at ebx
-	sar eax, $4			; (iSrcWidth >> 1) / 16		; loop count = num_of_mb
+	sar eax, $04			; (iSrcWidth >> 1) / 16		; loop count = num_of_mb
 	neg ebx				; - (iSrcWidth >> 1)
 	; each loop = source bandwidth: 32 bytes
 .xloops:
@@ -247,13 +247,13 @@
 	mov ecx, [esp+36]	; iSrcStride
 	mov ebp, [esp+44]	; iSrcHeight
 
-	sar ebp, $1		; iSrcHeight >> 1
+	sar ebp, $01		; iSrcHeight >> 1
 
 .yloops:
 	mov eax, [esp+40]	; iSrcWidth
-	sar eax, $1		; iSrcWidth >> 1
+	sar eax, $01		; iSrcWidth >> 1
 	mov ebx, eax		; iDstWidth restored at ebx
-	sar eax, $3		; (iSrcWidth >> 1) / 8		; loop count = num_of_mb
+	sar eax, $03		; (iSrcWidth >> 1) / 8		; loop count = num_of_mb
 	neg ebx			; - (iSrcWidth >> 1)
 	; each loop = source bandwidth: 16 bytes
 .xloops:
@@ -351,13 +351,13 @@
 	mov ecx, [esp+36]	; iSrcStride
 	mov ebp, [esp+44]	; iSrcHeight
 
-	sar ebp, $1		; iSrcHeight >> 1
+	sar ebp, $01		; iSrcHeight >> 1
 
 .yloops:
 	mov eax, [esp+40]	; iSrcWidth
-	sar eax, $1		; iSrcWidth >> 1
+	sar eax, $01		; iSrcWidth >> 1
 	mov ebx, eax		; iDstWidth restored at ebx
-	sar eax, $2		; (iSrcWidth >> 1) / 4		; loop count = num_of_mb
+	sar eax, $02		; (iSrcWidth >> 1) / 4		; loop count = num_of_mb
 	neg ebx			; - (iSrcWidth >> 1)
 	; each loop = source bandwidth: 8 bytes
 .xloops:
@@ -442,7 +442,7 @@
 	mov ecx, [esp+36]	; iSrcStride
 	mov ebp, [esp+44]	; iSrcHeight
 
-	sar ebp, $1			; iSrcHeight >> 1
+	sar ebp, $01			; iSrcHeight >> 1
 
 	movdqa xmm7, [shufb_mask_low]	; mask low
 	movdqa xmm6, [shufb_mask_high]	; mask high
@@ -449,9 +449,9 @@
 
 .yloops:
 	mov eax, [esp+40]	; iSrcWidth
-	sar eax, $1			; iSrcWidth >> 1
+	sar eax, $01			; iSrcWidth >> 1
 	mov ebx, eax		; iDstWidth restored at ebx
-	sar eax, $4			; (iSrcWidth >> 1) / 16		; loop count = num_of_mb
+	sar eax, $04			; (iSrcWidth >> 1) / 16		; loop count = num_of_mb
 	neg ebx				; - (iSrcWidth >> 1)
 	; each loop = source bandwidth: 32 bytes
 .xloops:
@@ -553,15 +553,15 @@
 	mov ecx, [esp+36]	; iSrcStride
 	mov ebp, [esp+44]	; iSrcHeight
 
-	sar ebp, $1		; iSrcHeight >> 1
+	sar ebp, $01		; iSrcHeight >> 1
 	movdqa xmm7, [shufb_mask_low]	; mask low
 	movdqa xmm6, [shufb_mask_high]	; mask high
 
 .yloops:
 	mov eax, [esp+40]	; iSrcWidth
-	sar eax, $1		; iSrcWidth >> 1
+	sar eax, $01		; iSrcWidth >> 1
 	mov ebx, eax		; iDstWidth restored at ebx
-	sar eax, $3		; (iSrcWidth >> 1) / 8		; loop count = num_of_mb
+	sar eax, $03		; (iSrcWidth >> 1) / 8		; loop count = num_of_mb
 	neg ebx			; - (iSrcWidth >> 1)
 	; each loop = source bandwidth: 16 bytes
 .xloops:
@@ -643,7 +643,7 @@
 	mov ecx, [esp+36]	; iSrcStride
 	mov ebp, [esp+44]	; iSrcHeight
 
-	sar ebp, $1			; iSrcHeight >> 1
+	sar ebp, $01			; iSrcHeight >> 1
 
 	movdqa xmm7, [shufb_mask_low]	; mask low
 	movdqa xmm6, [shufb_mask_high]	; mask high
@@ -650,9 +650,9 @@
 
 .yloops:
 	mov eax, [esp+40]	; iSrcWidth
-	sar eax, $1			; iSrcWidth >> 1
+	sar eax, $01			; iSrcWidth >> 1
 	mov ebx, eax		; iDstWidth restored at ebx
-	sar eax, $4			; (iSrcWidth >> 1) / 16		; loop count = num_of_mb
+	sar eax, $04			; (iSrcWidth >> 1) / 16		; loop count = num_of_mb
 	neg ebx				; - (iSrcWidth >> 1)
 	; each loop = source bandwidth: 32 bytes
 .xloops:
@@ -753,15 +753,15 @@
 	mov ecx, [esp+36]	; iSrcStride
 	mov ebp, [esp+44]	; iSrcHeight
 
-	sar ebp, $1		; iSrcHeight >> 1
+	sar ebp, $01		; iSrcHeight >> 1
 	movdqa xmm7, [shufb_mask_low]	; mask low
 	movdqa xmm6, [shufb_mask_high]	; mask high
 
 .yloops:
 	mov eax, [esp+40]	; iSrcWidth
-	sar eax, $1		; iSrcWidth >> 1
+	sar eax, $01		; iSrcWidth >> 1
 	mov ebx, eax		; iDstWidth restored at ebx
-	sar eax, $3		; (iSrcWidth >> 1) / 8		; loop count = num_of_mb
+	sar eax, $03		; (iSrcWidth >> 1) / 8		; loop count = num_of_mb
 	neg ebx			; - (iSrcWidth >> 1)
 	; each loop = source bandwidth: 16 bytes
 .xloops:
--- a/codec/processing/src/asm/vaa.asm
+++ b/codec/processing/src/asm/vaa.asm
@@ -121,7 +121,7 @@
 	punpcklwd %1, %2
 	punpckhwd %3, %4
 	punpcklwd %1, %3
-	psraw %1, $4
+	psraw %1, $04
 %endmacro
 
 %macro VAA_AVG_BLOCK_SSSE3 6 ; dst, t0, t1, t2, t3, t4
@@ -149,7 +149,7 @@
 	paddw %2, %4	; block 2, 3
 	phaddw %1, %2	; block[0]: 0-15, 16-31; block[1]: 32-47, 48-63; ..
 	phaddw %1, xmm7	; block[0]: 0-15; block[1]: 16-31; block[2]: 32-47; block[3]: 48-63; ....
-	psraw %1, $4
+	psraw %1, $04
 %endmacro
 
 %macro WELS_SAD_16x2_SSE2  0
@@ -353,7 +353,7 @@
 	pxor xmm0, xmm0
 .hloop:
 	mov eax, ebx
-	mov ebp, $0
+	mov ebp, $00
 .wloop:
 	movdqa xmm1, [esi+ebp]
 	movdqa xmm2, [edi+ebp]