ref: bdbf8725243c7eec4c8d0c59310a94bc002c81e2
dir: /vp8/common/x86/recon_sse2.asm/
; ; Copyright (c) 2010 The WebM project authors. All Rights Reserved. ; ; Use of this source code is governed by a BSD-style license ; that can be found in the LICENSE file in the root of the source ; tree. An additional intellectual property rights grant can be found ; in the file PATENTS. All contributing project authors may ; be found in the AUTHORS file in the root of the source tree. ; %include "vpx_ports/x86_abi_support.asm" SECTION .text ;void copy_mem16x16_sse2( ; unsigned char *src, ; int src_stride, ; unsigned char *dst, ; int dst_stride ; ) globalsym(vp8_copy_mem16x16_sse2) sym(vp8_copy_mem16x16_sse2): push rbp mov rbp, rsp SHADOW_ARGS_TO_STACK 4 push rsi push rdi ; end prolog mov rsi, arg(0) ;src; movdqu xmm0, [rsi] movsxd rax, dword ptr arg(1) ;src_stride; mov rdi, arg(2) ;dst; movdqu xmm1, [rsi+rax] movdqu xmm2, [rsi+rax*2] movsxd rcx, dword ptr arg(3) ;dst_stride lea rsi, [rsi+rax*2] movdqa [rdi], xmm0 add rsi, rax movdqa [rdi+rcx], xmm1 movdqa [rdi+rcx*2],xmm2 lea rdi, [rdi+rcx*2] movdqu xmm3, [rsi] add rdi, rcx movdqu xmm4, [rsi+rax] movdqu xmm5, [rsi+rax*2] lea rsi, [rsi+rax*2] movdqa [rdi], xmm3 add rsi, rax movdqa [rdi+rcx], xmm4 movdqa [rdi+rcx*2],xmm5 lea rdi, [rdi+rcx*2] movdqu xmm0, [rsi] add rdi, rcx movdqu xmm1, [rsi+rax] movdqu xmm2, [rsi+rax*2] lea rsi, [rsi+rax*2] movdqa [rdi], xmm0 add rsi, rax movdqa [rdi+rcx], xmm1 movdqa [rdi+rcx*2], xmm2 movdqu xmm3, [rsi] movdqu xmm4, [rsi+rax] lea rdi, [rdi+rcx*2] add rdi, rcx movdqu xmm5, [rsi+rax*2] lea rsi, [rsi+rax*2] movdqa [rdi], xmm3 add rsi, rax movdqa [rdi+rcx], xmm4 movdqa [rdi+rcx*2],xmm5 movdqu xmm0, [rsi] lea rdi, [rdi+rcx*2] movdqu xmm1, [rsi+rax] add rdi, rcx movdqu xmm2, [rsi+rax*2] lea rsi, [rsi+rax*2] movdqa [rdi], xmm0 movdqa [rdi+rcx], xmm1 movdqa [rdi+rcx*2],xmm2 movdqu xmm3, [rsi+rax] lea rdi, [rdi+rcx*2] movdqa [rdi+rcx], xmm3 ; begin epilog pop rdi pop rsi UNSHADOW_ARGS pop rbp ret