ref: b83cb9643bc647f6ef633330b52cb68882338067
parent: 289ca2ceaef841babb7ec3a302e0c7c625a54899
author: Victorien Le Couviour--Tuffet <victorien@videolan.org>
date: Fri Jan 17 08:42:54 EST 2020
x86: replace "mov hb, Xb" by "movzx hd, Xb" in MC It's a little easier for the CPU to simply overwrite a 32-bit reg rather than writing it's low 8 bits while conserving bits 8 to 31. In order to do that it actually fetches those bits, merge to a 32-bit value, and write that back to the 32-bit GPR. As those are always cleared, perform a zero extend mov to dword instead.
--- a/src/x86/mc.asm
+++ b/src/x86/mc.asm
@@ -566,7 +566,7 @@
lea t2d, [hq+(3<<8)]
.v_w128_loop:
PUT_BILIN_V_W32
- mov hb, t2b
+ movzx hd, t2b
add t0, 32
add t1, 32
mov dstq, t0
@@ -1492,7 +1492,7 @@
add tmpq, 32*16
sub hd, 2
jg .v_w128_loop
- mov hb, t2b
+ movzx hd, t2b
add t0, 64
add t1, 32
mov tmpq, t0
@@ -1758,7 +1758,7 @@
mova [tmpq-32*4], m2
sub hd, 2
jg .hv_w64_loop
- mov hb, t2b
+ movzx hd, t2b
add t0, 32
add t1, 16
mov tmpq, t0
@@ -1841,7 +1841,7 @@
add tmpq, 32*16
sub hd, 2
jg .hv_w128_loop
- mov hb, t2b
+ movzx hd, t2b
add t0, mmsize
add t1, mmsize/2
mov tmpq, t0
@@ -2247,7 +2247,7 @@
lea dstq, [dstq+dsq*2]
sub hd, 2
jg .v_w16_loop
- mov hb, r6b
+ movzx hd, r6b
add r4, 16
add r7, 16
mov dstq, r4
@@ -2516,7 +2516,7 @@
lea dstq, [dstq+dsq*2]
sub hd, 2
jg .hv_w8_loop
- mov hb, r6b
+ movzx hd, r6b
add r4, 8
add r7, 8
mov dstq, r4