ref: 3f2ea77908b70e382b967b7af4d49658b12e0bb2
parent: 883e89e0d2804b8620456fb09e62bb5d4c6dafd5
parent: a4f59bc0d7505b35535e002be1cdcebeb3622207
author: Licai Guo <licaguo@cisco.com>
date: Mon Apr 21 10:38:51 EDT 2014
Merge pull request #719 from dongzha/MC Modify ARM32 Neon code for Expand Chroma Picture, when UVWidth%16==8.
--- a/codec/common/arm/expand_picture_neon.S
+++ b/codec/common/arm/expand_picture_neon.S
@@ -87,7 +87,7 @@
WELS_ASM_FUNC_BEGIN ExpandPictureChroma_neon
- stmdb sp!, {r4-r8}
+ stmdb sp!, {r4-r9}
//Save the dst
mov r7, r0
mov r8, r3
@@ -109,12 +109,14 @@
//for the top and bottom expand
add r2, #32
+ mov r9, r2
+ bic r2, #15
sub r0, #16
mla r4, r1, r3, r0
sub r4, r1
_expand_picture_chroma_loop0:
mov r5, #16
- mls r5, r5, r1, r0
+ mls r5, r5, r1, r0
add r6, r4, r1
vld1.8 {q0}, [r0]!
vld1.8 {q1}, [r4]!
@@ -124,7 +126,7 @@
vst1.8 {q0}, [r5], r1
vst1.8 {q1}, [r6], r1
subs r8, #1
- bne _expand_picture_chroma_loop1
+ bne _expand_picture_chroma_loop1
subs r2, #16
bne _expand_picture_chroma_loop0
@@ -131,7 +133,23 @@
//vldreq.32 d0, [r0]
- ldmia sp!, {r4-r8}
+ and r9, #15
+ cmp r9, #8
+ bne _expand_picture_chroma_end
+ mov r5, #16
+ mls r5, r5, r1, r0
+ add r6, r4, r1
+ vld1.8 {d0}, [r0]!
+ vld1.8 {d2}, [r4]!
+ mov r8, #16
+_expand_picture_chroma_loop3:
+ vst1.8 {d0}, [r5], r1
+ vst1.8 {d2}, [r6], r1
+ subs r8, #1
+ bne _expand_picture_chroma_loop3
+_expand_picture_chroma_end:
+
+ ldmia sp!, {r4-r9}
WELS_ASM_FUNC_END
#endif
--- a/codec/encoder/core/src/expand_pic.cpp
+++ b/codec/encoder/core/src/expand_pic.cpp
@@ -132,7 +132,7 @@
#if defined(HAVE_NEON)
if (kuiCPUFlag & WELS_CPU_NEON) {
pFuncList->pfExpandLumaPicture = ExpandPictureLuma_neon;
- pFuncList->pfExpandChromaPicture[0] = ExpandPictureChroma_c;
+ pFuncList->pfExpandChromaPicture[0] = ExpandPictureChroma_neon;
pFuncList->pfExpandChromaPicture[1] = ExpandPictureChroma_neon;
}
#endif//HAVE_NEON