ref: 248f324c62bae5b2e2dd202e8e251c670de8af5a
parent: efcee6369231e94933af44bcff3009da1c53c25d
author: Licai Guo <guolicai@gmail.com>
date: Wed Mar 5 05:04:06 EST 2014
Add intra predictor arm asm code.
--- a/codec/decoder/core/inc/get_intra_predictor.h
+++ b/codec/decoder/core/inc/get_intra_predictor.h
@@ -124,7 +124,7 @@
void WelsDecoderIChromaPredV_neon(uint8_t *pPred, const int32_t kiStride);
void WelsDecoderIChromaPredH_neon(uint8_t *pPred, const int32_t kiStride);
-void WelsDecoderIChromaPredDC_neon(uint8_t *pPred, const int32_t kiStride);
+void WelsDecoderIChromaPredDc_neon(uint8_t *pPred, const int32_t kiStride);
void WelsDecoderIChromaPredPlane_neon(uint8_t *pPred, const int32_t kiStride);
#endif//HAVE_NEON
--- a/codec/decoder/core/src/decoder.cpp
+++ b/codec/decoder/core/src/decoder.cpp
@@ -675,7 +675,7 @@
pCtx->pGetIChromaPredFunc[C_PRED_H] = WelsDecoderIChromaPredH_neon;
pCtx->pGetIChromaPredFunc[C_PRED_V] = WelsDecoderIChromaPredV_neon;
pCtx->pGetIChromaPredFunc[C_PRED_P ] = WelsDecoderIChromaPredPlane_neon;
- pCtx->pGetIChromaPredFunc[C_PRED_DC] = WelsDecoderIChromaPredDC_neon;
+ pCtx->pGetIChromaPredFunc[C_PRED_DC] = WelsDecoderIChromaPredDc_neon;
}
#endif//HAVE_NEON
--- a/codec/encoder/core/arm/intra_pred_neon.S
+++ b/codec/encoder/core/arm/intra_pred_neon.S
@@ -62,7 +62,7 @@
#endif
-WELS_ASM_FUNC_BEGIN enc_get_i16x16_luma_pred_v_neon
+WELS_ASM_FUNC_BEGIN WelsI16x16LumaPredV_neon
//Get the top line data to 'q0'
sub r3, r1, r2
vldm r3, {d0, d1}
@@ -80,7 +80,7 @@
WELS_ASM_FUNC_END
-WELS_ASM_FUNC_BEGIN enc_get_i16x16_luma_pred_h_neon
+WELS_ASM_FUNC_BEGIN WelsI16x16LumaPredH_neon
//stmdb sp!, {r4, lr}
sub r1, r1, #1
mov r3, #4
@@ -107,7 +107,7 @@
WELS_ASM_FUNC_END
-WELS_ASM_FUNC_BEGIN enc_get_i16x16_luma_pred_dc_both_neon
+WELS_ASM_FUNC_BEGIN WelsI16x16LumaPredDc_neon
//stmdb sp!, { r2-r5, lr}
//Get the left vertical line data
sub r3, r1, #1
@@ -150,7 +150,7 @@
CONST1_GET_I16X16_LUMA_PRED_PLANE: .long 0xfcfbfaf9, 0x00fffefd
-WELS_ASM_FUNC_BEGIN enc_get_i16x16_luma_pred_plane_neon
+WELS_ASM_FUNC_BEGIN WelsI16x16LumaPredPlane_neon
//stmdb sp!, { r4, lr}
//Load the table {(8,7,6,5,4,3,2,1) * 5}
@@ -241,7 +241,7 @@
WELS_ASM_FUNC_END
-WELS_ASM_FUNC_BEGIN enc_get_i4x4_luma_pred_v_neon
+WELS_ASM_FUNC_BEGIN WelsI4x4LumaPredV_neon
//stmdb sp!, { r2-r5, lr}
//Load the top row (4 bytes)
sub r3, r1, r2
@@ -256,7 +256,7 @@
WELS_ASM_FUNC_END
-WELS_ASM_FUNC_BEGIN enc_get_i4x4_luma_pred_h_neon
+WELS_ASM_FUNC_BEGIN WelsI4x4LumaPredH_neon
//stmdb sp!, { r2-r5, lr}
//Load the left column (4 bytes)
sub r3, r1, #1
@@ -274,7 +274,7 @@
WELS_ASM_FUNC_END
-WELS_ASM_FUNC_BEGIN enc_get_i4x4_luma_pred_d_l_neon
+WELS_ASM_FUNC_BEGIN WelsI4x4LumaPredDDL_neon
//stmdb sp!, { r2-r5, lr}
//Load the top row data(8 bytes)
sub r3, r1, r2
@@ -312,7 +312,7 @@
WELS_ASM_FUNC_END
-WELS_ASM_FUNC_BEGIN enc_get_i4x4_luma_pred_d_r_neon
+WELS_ASM_FUNC_BEGIN WelsI4x4LumaPredDDR_neon
//stmdb sp!, { r2-r5, lr}
//Load the top row (4 bytes)
sub r3, r1, r2
@@ -352,7 +352,7 @@
WELS_ASM_FUNC_END
-WELS_ASM_FUNC_BEGIN enc_get_i4x4_luma_pred_v_l_neon
+WELS_ASM_FUNC_BEGIN WelsI4x4LumaPredVL_neon
//stmdb sp!, { r2-r5, lr}
//Load the top row (8 bytes)
sub r3, r1, r2
@@ -382,7 +382,7 @@
WELS_ASM_FUNC_END
-WELS_ASM_FUNC_BEGIN enc_get_i4x4_luma_pred_v_r_neon
+WELS_ASM_FUNC_BEGIN WelsI4x4LumaPredVR_neon
//stmdb sp!, { r2-r5, lr}
//Load the top row (4 bytes)
sub r3, r1, r2
@@ -419,7 +419,7 @@
WELS_ASM_FUNC_END
-WELS_ASM_FUNC_BEGIN enc_get_i4x4_luma_pred_h_u_neon
+WELS_ASM_FUNC_BEGIN WelsI4x4LumaPredHU_neon
//stmdb sp!, { r4, lr}
//Load the left column data
sub r3, r1, #1
@@ -452,7 +452,7 @@
WELS_ASM_FUNC_END
-WELS_ASM_FUNC_BEGIN enc_get_i4x4_luma_pred_h_d_neon
+WELS_ASM_FUNC_BEGIN WelsI4x4LumaPredHD_neon
//stmdb sp!, { r2-r5, lr}
//Load the data
sub r3, r1, r2
@@ -487,7 +487,7 @@
WELS_ASM_FUNC_END
-WELS_ASM_FUNC_BEGIN enc_get_i_chroma_pred_v_neon
+WELS_ASM_FUNC_BEGIN WelsIChromaPredV_neon
//stmdb sp!, { r2-r5, lr}
//Get the top row (8 byte)
sub r3, r1, r2
@@ -506,7 +506,7 @@
WELS_ASM_FUNC_END
-WELS_ASM_FUNC_BEGIN enc_get_i_chroma_pred_h_neon
+WELS_ASM_FUNC_BEGIN WelsIChromaPredH_neon
//stmdb sp!, { r2-r5, lr}
////Get the left column (8 byte)
sub r3, r1, #1
@@ -532,7 +532,7 @@
WELS_ASM_FUNC_END
-WELS_ASM_FUNC_BEGIN enc_get_i_chroma_pred_dc_both_neon
+WELS_ASM_FUNC_BEGIN WelsIChromaPredDc_neon
//stmdb sp!, { r2-r5, lr}
//Load the left column data (8 bytes)
sub r3, r1, #1
@@ -574,7 +574,7 @@
//Table {-3,-2,-1,0,1,2,3,4}
CONST1_GET_I_CHROMA_PRED_PLANE: .long 0xfffefffd, 0x0000ffff,0x00020001,0x00040003
-WELS_ASM_FUNC_BEGIN enc_get_i_chroma_pred_plane_neon
+WELS_ASM_FUNC_BEGIN WelsIChromaPredPlane_neon
//stmdb sp!, { r2-r5, lr}
//Load the top row data
sub r3, r1, #1
--- a/codec/encoder/core/inc/get_intra_predictor.h
+++ b/codec/encoder/core/inc/get_intra_predictor.h
@@ -115,6 +115,26 @@
void WelsI4x4LumaPredHU_mmx (uint8_t* pPred, uint8_t* pRef, const int32_t kiStride);
#endif//X86_ASM
+#if defined(HAVE_NEON)
+void WelsI16x16LumaPredV_neon(uint8_t *pPred, uint8_t* pRef, const int32_t kiStride);
+void WelsI16x16LumaPredH_neon(uint8_t *pPred, uint8_t* pRef, const int32_t kiStride);
+void WelsI16x16LumaPredDc_neon(uint8_t *pPred, uint8_t* pRef, const int32_t kiStride);
+void WelsI16x16LumaPredPlane_neon(uint8_t *pPred, uint8_t* pRef, const int32_t kiStride);
+
+void WelsI4x4LumaPredV_neon(uint8_t *pPred, uint8_t* pRef, const int32_t kiStride);
+void WelsI4x4LumaPredH_neon(uint8_t *pPred, uint8_t* pRef, const int32_t kiStride);
+void WelsI4x4LumaPredDDL_neon(uint8_t *pPred, uint8_t* pRef, const int32_t kiStride);
+void WelsI4x4LumaPredDDR_neon(uint8_t *pPred, uint8_t* pRef, const int32_t kiStride);
+void WelsI4x4LumaPredVL_neon(uint8_t *pPred, uint8_t* pRef, const int32_t kiStride);
+void WelsI4x4LumaPredVR_neon(uint8_t *pPred, uint8_t* pRef, const int32_t kiStride);
+void WelsI4x4LumaPredHU_neon(uint8_t *pPred, uint8_t* pRef, const int32_t kiStride);
+void WelsI4x4LumaPredHD_neon(uint8_t *pPred, uint8_t* pRef, const int32_t kiStride);
+
+void WelsIChromaPredV_neon(uint8_t *pPred, uint8_t* pRef, const int32_t kiStride);
+void WelsIChromaPredH_neon(uint8_t *pPred, uint8_t* pRef, const int32_t kiStride);
+void WelsIChromaPredDc_neon(uint8_t *pPred, uint8_t* pRef, const int32_t kiStride);
+void WelsIChromaPredPlane_neon(uint8_t *pPred, uint8_t* pRef, const int32_t kiStride);
+#endif//HAVE_NEON
#if defined(__cplusplus)
}
#endif//__cplusplus
--- a/codec/encoder/core/src/encoder_ext.cpp
+++ b/codec/encoder/core/src/encoder_ext.cpp
@@ -1944,13 +1944,6 @@
else if (uiCpuFeatureFlags & WELS_CPU_CACHELINE_16)
iCacheLineSize = 16;
OutputCpuFeaturesLog (uiCpuFeatureFlags, uiCpuCores, iCacheLineSize);
-#elif defined(HAVE_NEON)
-#if defined(ANDROID_NDK)
- uiCpuFeatureFlags = WelsCPUFeatureDetectAndroid();
-#endif
-#if defined(APPLE_IOS)
- uiCpuFeatureFlags = WelsCPUFeatureDetectIOS();
-#endif
#else
iCacheLineSize = 16; // 16 bytes aligned in default
#endif//X86_ASM
--- a/codec/encoder/core/src/get_intra_predictor.cpp
+++ b/codec/encoder/core/src/get_intra_predictor.cpp
@@ -709,6 +709,29 @@
pFuncList->pfGetChromaPred[C_PRED_DC_L] = WelsIChormaPredDcLeft_c;
pFuncList->pfGetChromaPred[C_PRED_DC_T] = WelsIChormaPredDcTop_c;
pFuncList->pfGetChromaPred[C_PRED_DC_128] = WelsIChormaPredDcNA_c;
+#ifdef HAVE_NEON
+ if (kuiCpuFlag & WELS_CPU_NEON) {
+ pFuncList->pfGetLumaI4x4Pred[I4_PRED_DDR] = WelsI4x4LumaPredDDR_neon;
+ pFuncList->pfGetLumaI4x4Pred[I4_PRED_HD] = WelsI4x4LumaPredHD_neon;
+ pFuncList->pfGetLumaI4x4Pred[I4_PRED_HU] = WelsI4x4LumaPredHU_neon;
+ pFuncList->pfGetLumaI4x4Pred[I4_PRED_VR] = WelsI4x4LumaPredVR_neon;
+ pFuncList->pfGetLumaI4x4Pred[I4_PRED_DDL] = WelsI4x4LumaPredDDL_neon;
+ pFuncList->pfGetLumaI4x4Pred[I4_PRED_VL] = WelsI4x4LumaPredVL_neon;
+ pFuncList->pfGetLumaI4x4Pred[I4_PRED_H] = WelsI4x4LumaPredH_neon;
+ pFuncList->pfGetLumaI4x4Pred[I4_PRED_V] = WelsI4x4LumaPredV_neon;
+
+ pFuncList->pfGetLumaI16x16Pred[I16_PRED_V] = WelsI16x16LumaPredV_neon;
+ pFuncList->pfGetLumaI16x16Pred[I16_PRED_H] = WelsI16x16LumaPredH_neon;
+ pFuncList->pfGetLumaI16x16Pred[I16_PRED_DC] = WelsI16x16LumaPredDc_neon;
+ pFuncList->pfGetLumaI16x16Pred[I16_PRED_P] = WelsI16x16LumaPredPlane_neon;
+
+ pFuncList->pfGetChromaPred[C_PRED_DC] = WelsIChromaPredDc_neon;
+ pFuncList->pfGetChromaPred[C_PRED_V] = WelsIChromaPredV_neon;
+ pFuncList->pfGetChromaPred[C_PRED_P] = WelsIChromaPredPlane_neon;
+ pFuncList->pfGetChromaPred[C_PRED_H] = WelsIChromaPredH_neon;
+ }
+#endif
+
#ifdef X86_ASM
if (kuiCpuFlag & WELS_CPU_MMXEXT) {
pFuncList->pfGetLumaI4x4Pred[I4_PRED_DDR] = WelsI4x4LumaPredDDR_mmx;