ref: 914b160fb594b7c6d183aba6b40258b66d0ce054
parent: 0b9be9320518dfe08057ad0e091e604d6d7d67a3
author: Luca Barbato <lu_zero@gentoo.org>
date: Tue Apr 18 18:55:53 EDT 2017
ppc: h predictor 8x8 Slightly faster with the current compiler. Change-Id: Iae225fac08395eb430c97a2abec69c60f5cf5c47
--- a/test/test_intra_pred_speed.cc
+++ b/test/test_intra_pred_speed.cc
@@ -318,8 +318,9 @@
vpx_tm_predictor_4x4_vsx)
INTRA_PRED_TEST(VSX, TestIntraPred8, vpx_dc_predictor_8x8_vsx, NULL, NULL, NULL,
- NULL, NULL, vpx_d45_predictor_8x8_vsx, NULL, NULL, NULL, NULL,
- vpx_d63_predictor_8x8_vsx, vpx_tm_predictor_8x8_vsx)
+ NULL, vpx_h_predictor_8x8_vsx, vpx_d45_predictor_8x8_vsx, NULL,
+ NULL, NULL, NULL, vpx_d63_predictor_8x8_vsx,
+ vpx_tm_predictor_8x8_vsx)
INTRA_PRED_TEST(VSX, TestIntraPred16, vpx_dc_predictor_16x16_vsx,
vpx_dc_left_predictor_16x16_vsx, vpx_dc_top_predictor_16x16_vsx,
--- a/vpx_dsp/ppc/intrapred_vsx.c
+++ b/vpx_dsp/ppc/intrapred_vsx.c
@@ -56,6 +56,38 @@
vec_vsx_st(vec_sel(v3, vec_vsx_ld(0, dst), (uint8x16_t)mask4), 0, dst);
}
+void vpx_h_predictor_8x8_vsx(uint8_t *dst, ptrdiff_t stride,
+ const uint8_t *above, const uint8_t *left) {
+ const uint8x16_t d = vec_vsx_ld(0, left);
+ const uint8x16_t v0 = vec_splat(d, 0);
+ const uint8x16_t v1 = vec_splat(d, 1);
+ const uint8x16_t v2 = vec_splat(d, 2);
+ const uint8x16_t v3 = vec_splat(d, 3);
+
+ const uint8x16_t v4 = vec_splat(d, 4);
+ const uint8x16_t v5 = vec_splat(d, 5);
+ const uint8x16_t v6 = vec_splat(d, 6);
+ const uint8x16_t v7 = vec_splat(d, 7);
+
+ (void)above;
+
+ vec_vsx_st(xxpermdi(v0, vec_vsx_ld(0, dst), 1), 0, dst);
+ dst += stride;
+ vec_vsx_st(xxpermdi(v1, vec_vsx_ld(0, dst), 1), 0, dst);
+ dst += stride;
+ vec_vsx_st(xxpermdi(v2, vec_vsx_ld(0, dst), 1), 0, dst);
+ dst += stride;
+ vec_vsx_st(xxpermdi(v3, vec_vsx_ld(0, dst), 1), 0, dst);
+ dst += stride;
+ vec_vsx_st(xxpermdi(v4, vec_vsx_ld(0, dst), 1), 0, dst);
+ dst += stride;
+ vec_vsx_st(xxpermdi(v5, vec_vsx_ld(0, dst), 1), 0, dst);
+ dst += stride;
+ vec_vsx_st(xxpermdi(v6, vec_vsx_ld(0, dst), 1), 0, dst);
+ dst += stride;
+ vec_vsx_st(xxpermdi(v7, vec_vsx_ld(0, dst), 1), 0, dst);
+}
+
void vpx_h_predictor_16x16_vsx(uint8_t *dst, ptrdiff_t stride,
const uint8_t *above, const uint8_t *left) {
const uint8x16_t d = vec_vsx_ld(0, left);
--- a/vpx_dsp/vpx_dsp_rtcd_defs.pl
+++ b/vpx_dsp/vpx_dsp_rtcd_defs.pl
@@ -81,7 +81,7 @@
specialize qw/vpx_d63_predictor_8x8 ssse3 vsx/;
add_proto qw/void vpx_h_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";
-specialize qw/vpx_h_predictor_8x8 neon dspr2 msa sse2/;
+specialize qw/vpx_h_predictor_8x8 neon dspr2 msa sse2 vsx/;
add_proto qw/void vpx_d117_predictor_8x8/, "uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left";