ref: 3811665793d30d62523855f87112d1e267996d53
parent: 19b4c9c076c5542901871be2b50d9084980cffce
author: Victorien Le Couviour--Tuffet <victorien.lecouviour.tuffet@gmail.com>
date: Sun Feb 24 10:10:02 EST 2019
x86: optimize 4 by X cdef filters for HAVE_RIGHT=0
--- a/src/x86/cdef.asm
+++ b/src/x86/cdef.asm
@@ -156,15 +156,17 @@
.no_right:
%if %1 == 4
movd xm1, [dstq+strideq*0]
- movd xm2, [dstq+strideq*2]
- pinsrd xm1, [dstq+strideq*1], 1
- pinsrd xm2, [dstq+stride3q], 1
+ movd xm2, [dstq+strideq*1]
+ movd xm3, [dstq+strideq*2]
+ movd xm4, [dstq+stride3q]
pmovzxbw xm1, xm1
pmovzxbw xm2, xm2
+ pmovzxbw xm3, xm3
+ pmovzxbw xm4, xm4
movq [px+0*%3], xm1
- movhps [px+1*%3], xm1
- movq [px+2*%3], xm2
- movhps [px+3*%3], xm2
+ movq [px+1*%3], xm2
+ movq [px+2*%3], xm3
+ movq [px+3*%3], xm4
%else
pmovzxbw xm1, [dstq+strideq*0]
pmovzxbw xm2, [dstq+strideq*1]