ref: 4950dbceaf22f0faefabdc61bb168c7bac1ab510
parent: 09f9c5d7f90bd19dfc1994926a6e35680ba9545c
 parent: 3ea537c0eeb60d33b5661e965384ca4a2ecdcded
	author: James Zern <jzern@google.com>
	date: Thu Feb 18 13:47:48 EST 2016
	
Merge changes from topic 'rm-loopfilter-count-param' * changes: lpf_8_test: remove unneeded function wrapper remove loopfilter 'count' param TODOs split vpx_highbd_lpf_horizontal_16 in two split vpx_lpf_horizontal_16 in two vpx_highbd_lpf_horizontal_4: remove unused count param vpx_highbd_lpf_horizontal_8: remove unused count param vpx_highbd_lpf_vertical_4: remove unused count param vpx_highbd_lpf_vertical_8: remove unused count param vpx_lpf_horizontal_4: remove unused count param vpx_lpf_horizontal_8: remove unused count param vpx_lpf_vertical_4: remove unused count param vpx_lpf_vertical_8: remove unused count param lpf_8_test: add missing dspr2 tests lpf_8_test: add missing vpx_lpf_horizontal_4 tests lpf_8_test: add missing vpx_lpf_vertical_4 tests lpf_8_test: simplify function wrapper generation
--- a/test/lpf_8_test.cc
+++ b/test/lpf_8_test.cc
@@ -37,7 +37,7 @@
#if CONFIG_VP9_HIGHBITDEPTH
typedef void (*loop_op_t)(uint16_t *s, int p, const uint8_t *blimit,
const uint8_t *limit, const uint8_t *thresh,
- int count, int bd);
+ int bd);
typedef void (*dual_loop_op_t)(uint16_t *s, int p, const uint8_t *blimit0,
const uint8_t *limit0, const uint8_t *thresh0,
const uint8_t *blimit1, const uint8_t *limit1,
@@ -44,8 +44,7 @@
const uint8_t *thresh1, int bd);
#else
typedef void (*loop_op_t)(uint8_t *s, int p, const uint8_t *blimit,
- const uint8_t *limit, const uint8_t *thresh,
- int count);
+ const uint8_t *limit, const uint8_t *thresh);
typedef void (*dual_loop_op_t)(uint8_t *s, int p, const uint8_t *blimit0,
const uint8_t *limit0, const uint8_t *thresh0,
const uint8_t *blimit1, const uint8_t *limit1,
@@ -52,105 +51,9 @@
const uint8_t *thresh1);
#endif // CONFIG_VP9_HIGHBITDEPTH
-typedef std::tr1::tuple<loop_op_t, loop_op_t, int, int> loop8_param_t;
+typedef std::tr1::tuple<loop_op_t, loop_op_t, int> loop8_param_t;
typedef std::tr1::tuple<dual_loop_op_t, dual_loop_op_t, int> dualloop8_param_t;
-#if HAVE_SSE2
-#if CONFIG_VP9_HIGHBITDEPTH
-void wrapper_vertical_16_sse2(uint16_t *s, int p, const uint8_t *blimit,
- const uint8_t *limit, const uint8_t *thresh,
-                              int count, int bd) {- vpx_highbd_lpf_vertical_16_sse2(s, p, blimit, limit, thresh, bd);
-}
-
-void wrapper_vertical_16_c(uint16_t *s, int p, const uint8_t *blimit,
- const uint8_t *limit, const uint8_t *thresh,
-                           int count, int bd) {- vpx_highbd_lpf_vertical_16_c(s, p, blimit, limit, thresh, bd);
-}
-
-void wrapper_vertical_16_dual_sse2(uint16_t *s, int p, const uint8_t *blimit,
- const uint8_t *limit, const uint8_t *thresh,
-                                   int count, int bd) {- vpx_highbd_lpf_vertical_16_dual_sse2(s, p, blimit, limit, thresh, bd);
-}
-
-void wrapper_vertical_16_dual_c(uint16_t *s, int p, const uint8_t *blimit,
- const uint8_t *limit, const uint8_t *thresh,
-                                int count, int bd) {- vpx_highbd_lpf_vertical_16_dual_c(s, p, blimit, limit, thresh, bd);
-}
-#else
-void wrapper_vertical_16_sse2(uint8_t *s, int p, const uint8_t *blimit,
- const uint8_t *limit, const uint8_t *thresh,
-                              int count) {- vpx_lpf_vertical_16_sse2(s, p, blimit, limit, thresh);
-}
-
-void wrapper_vertical_16_c(uint8_t *s, int p, const uint8_t *blimit,
- const uint8_t *limit, const uint8_t *thresh,
-                           int count) {- vpx_lpf_vertical_16_c(s, p, blimit, limit, thresh);
-}
-
-void wrapper_vertical_16_dual_sse2(uint8_t *s, int p, const uint8_t *blimit,
- const uint8_t *limit, const uint8_t *thresh,
-                                   int count) {- vpx_lpf_vertical_16_dual_sse2(s, p, blimit, limit, thresh);
-}
-
-void wrapper_vertical_16_dual_c(uint8_t *s, int p, const uint8_t *blimit,
- const uint8_t *limit, const uint8_t *thresh,
-                                int count) {- vpx_lpf_vertical_16_dual_c(s, p, blimit, limit, thresh);
-}
-#endif // CONFIG_VP9_HIGHBITDEPTH
-#endif // HAVE_SSE2
-
-#if HAVE_NEON_ASM
-#if CONFIG_VP9_HIGHBITDEPTH
-// No neon high bitdepth functions.
-#else
-void wrapper_vertical_16_neon(uint8_t *s, int p, const uint8_t *blimit,
- const uint8_t *limit, const uint8_t *thresh,
-                              int count) {- vpx_lpf_vertical_16_neon(s, p, blimit, limit, thresh);
-}
-
-void wrapper_vertical_16_c(uint8_t *s, int p, const uint8_t *blimit,
- const uint8_t *limit, const uint8_t *thresh,
-                           int count) {- vpx_lpf_vertical_16_c(s, p, blimit, limit, thresh);
-}
-
-void wrapper_vertical_16_dual_neon(uint8_t *s, int p, const uint8_t *blimit,
- const uint8_t *limit, const uint8_t *thresh,
-                                   int count) {- vpx_lpf_vertical_16_dual_neon(s, p, blimit, limit, thresh);
-}
-
-void wrapper_vertical_16_dual_c(uint8_t *s, int p, const uint8_t *blimit,
- const uint8_t *limit, const uint8_t *thresh,
-                                int count) {- vpx_lpf_vertical_16_dual_c(s, p, blimit, limit, thresh);
-}
-#endif // CONFIG_VP9_HIGHBITDEPTH
-#endif // HAVE_NEON_ASM
-
-#if HAVE_MSA && (!CONFIG_VP9_HIGHBITDEPTH)
-void wrapper_vertical_16_msa(uint8_t *s, int p, const uint8_t *blimit,
- const uint8_t *limit, const uint8_t *thresh,
-                             int count) {- vpx_lpf_vertical_16_msa(s, p, blimit, limit, thresh);
-}
-
-void wrapper_vertical_16_c(uint8_t *s, int p, const uint8_t *blimit,
- const uint8_t *limit, const uint8_t *thresh,
-                           int count) {- vpx_lpf_vertical_16_c(s, p, blimit, limit, thresh);
-}
-#endif // HAVE_MSA && (!CONFIG_VP9_HIGHBITDEPTH)
-
 class Loop8Test6Param : public ::testing::TestWithParam<loop8_param_t> {public:
   virtual ~Loop8Test6Param() {}@@ -158,7 +61,6 @@
loopfilter_op_ = GET_PARAM(0);
ref_loopfilter_op_ = GET_PARAM(1);
bit_depth_ = GET_PARAM(2);
- count_ = GET_PARAM(3);
mask_ = (1 << bit_depth_) - 1;
}
@@ -166,7 +68,6 @@
protected:
int bit_depth_;
- int count_;
int mask_;
loop_op_t loopfilter_op_;
loop_op_t ref_loopfilter_op_;
@@ -253,13 +154,13 @@
ref_s[j] = s[j];
}
#if CONFIG_VP9_HIGHBITDEPTH
- ref_loopfilter_op_(ref_s + 8 + p * 8, p, blimit, limit, thresh, count_, bd);
+ ref_loopfilter_op_(ref_s + 8 + p * 8, p, blimit, limit, thresh, bd);
ASM_REGISTER_STATE_CHECK(
- loopfilter_op_(s + 8 + p * 8, p, blimit, limit, thresh, count_, bd));
+ loopfilter_op_(s + 8 + p * 8, p, blimit, limit, thresh, bd));
#else
- ref_loopfilter_op_(ref_s+8+p*8, p, blimit, limit, thresh, count_);
+ ref_loopfilter_op_(ref_s+8+p*8, p, blimit, limit, thresh);
ASM_REGISTER_STATE_CHECK(
- loopfilter_op_(s + 8 + p * 8, p, blimit, limit, thresh, count_));
+ loopfilter_op_(s + 8 + p * 8, p, blimit, limit, thresh));
#endif // CONFIG_VP9_HIGHBITDEPTH
     for (int j = 0; j < kNumCoeffs; ++j) {@@ -325,13 +226,13 @@
ref_s[j] = s[j];
}
#if CONFIG_VP9_HIGHBITDEPTH
- ref_loopfilter_op_(ref_s + 8 + p * 8, p, blimit, limit, thresh, count_, bd);
+ ref_loopfilter_op_(ref_s + 8 + p * 8, p, blimit, limit, thresh, bd);
ASM_REGISTER_STATE_CHECK(
- loopfilter_op_(s + 8 + p * 8, p, blimit, limit, thresh, count_, bd));
+ loopfilter_op_(s + 8 + p * 8, p, blimit, limit, thresh, bd));
#else
- ref_loopfilter_op_(ref_s+8+p*8, p, blimit, limit, thresh, count_);
+ ref_loopfilter_op_(ref_s+8+p*8, p, blimit, limit, thresh);
ASM_REGISTER_STATE_CHECK(
- loopfilter_op_(s + 8 + p * 8, p, blimit, limit, thresh, count_));
+ loopfilter_op_(s + 8 + p * 8, p, blimit, limit, thresh));
#endif // CONFIG_VP9_HIGHBITDEPTH
     for (int j = 0; j < kNumCoeffs; ++j) {err_count += ref_s[j] != s[j];
@@ -529,6 +430,16 @@
using std::tr1::make_tuple;
+#if HAVE_MMX && !CONFIG_VP9_HIGHBITDEPTH
+INSTANTIATE_TEST_CASE_P(
+ MMX, Loop8Test6Param,
+ ::testing::Values(
+ make_tuple(&vpx_lpf_horizontal_4_mmx,
+ &vpx_lpf_horizontal_4_c, 8),
+ make_tuple(&vpx_lpf_vertical_4_mmx,
+ &vpx_lpf_vertical_4_c, 8)));
+#endif // HAVE_MMX
+
#if HAVE_SSE2
#if CONFIG_VP9_HIGHBITDEPTH
INSTANTIATE_TEST_CASE_P(
@@ -535,64 +446,69 @@
SSE2, Loop8Test6Param,
::testing::Values(
make_tuple(&vpx_highbd_lpf_horizontal_4_sse2,
- &vpx_highbd_lpf_horizontal_4_c, 8, 1),
+ &vpx_highbd_lpf_horizontal_4_c, 8),
make_tuple(&vpx_highbd_lpf_vertical_4_sse2,
- &vpx_highbd_lpf_vertical_4_c, 8, 1),
+ &vpx_highbd_lpf_vertical_4_c, 8),
make_tuple(&vpx_highbd_lpf_horizontal_8_sse2,
- &vpx_highbd_lpf_horizontal_8_c, 8, 1),
- make_tuple(&vpx_highbd_lpf_horizontal_16_sse2,
- &vpx_highbd_lpf_horizontal_16_c, 8, 1),
- make_tuple(&vpx_highbd_lpf_horizontal_16_sse2,
- &vpx_highbd_lpf_horizontal_16_c, 8, 2),
+ &vpx_highbd_lpf_horizontal_8_c, 8),
+ make_tuple(&vpx_highbd_lpf_horizontal_edge_8_sse2,
+ &vpx_highbd_lpf_horizontal_edge_8_c, 8),
+ make_tuple(&vpx_highbd_lpf_horizontal_edge_16_sse2,
+ &vpx_highbd_lpf_horizontal_edge_16_c, 8),
make_tuple(&vpx_highbd_lpf_vertical_8_sse2,
- &vpx_highbd_lpf_vertical_8_c, 8, 1),
- make_tuple(&wrapper_vertical_16_sse2,
- &wrapper_vertical_16_c, 8, 1),
+ &vpx_highbd_lpf_vertical_8_c, 8),
+ make_tuple(&vpx_highbd_lpf_vertical_16_sse2,
+ &vpx_highbd_lpf_vertical_16_c, 8),
make_tuple(&vpx_highbd_lpf_horizontal_4_sse2,
- &vpx_highbd_lpf_horizontal_4_c, 10, 1),
+ &vpx_highbd_lpf_horizontal_4_c, 10),
make_tuple(&vpx_highbd_lpf_vertical_4_sse2,
- &vpx_highbd_lpf_vertical_4_c, 10, 1),
+ &vpx_highbd_lpf_vertical_4_c, 10),
make_tuple(&vpx_highbd_lpf_horizontal_8_sse2,
- &vpx_highbd_lpf_horizontal_8_c, 10, 1),
- make_tuple(&vpx_highbd_lpf_horizontal_16_sse2,
- &vpx_highbd_lpf_horizontal_16_c, 10, 1),
- make_tuple(&vpx_highbd_lpf_horizontal_16_sse2,
- &vpx_highbd_lpf_horizontal_16_c, 10, 2),
+ &vpx_highbd_lpf_horizontal_8_c, 10),
+ make_tuple(&vpx_highbd_lpf_horizontal_edge_8_sse2,
+ &vpx_highbd_lpf_horizontal_edge_8_c, 10),
+ make_tuple(&vpx_highbd_lpf_horizontal_edge_16_sse2,
+ &vpx_highbd_lpf_horizontal_edge_16_c, 10),
make_tuple(&vpx_highbd_lpf_vertical_8_sse2,
- &vpx_highbd_lpf_vertical_8_c, 10, 1),
- make_tuple(&wrapper_vertical_16_sse2,
- &wrapper_vertical_16_c, 10, 1),
+ &vpx_highbd_lpf_vertical_8_c, 10),
+ make_tuple(&vpx_highbd_lpf_vertical_16_sse2,
+ &vpx_highbd_lpf_vertical_16_c, 10),
make_tuple(&vpx_highbd_lpf_horizontal_4_sse2,
- &vpx_highbd_lpf_horizontal_4_c, 12, 1),
+ &vpx_highbd_lpf_horizontal_4_c, 12),
make_tuple(&vpx_highbd_lpf_vertical_4_sse2,
- &vpx_highbd_lpf_vertical_4_c, 12, 1),
+ &vpx_highbd_lpf_vertical_4_c, 12),
make_tuple(&vpx_highbd_lpf_horizontal_8_sse2,
- &vpx_highbd_lpf_horizontal_8_c, 12, 1),
- make_tuple(&vpx_highbd_lpf_horizontal_16_sse2,
- &vpx_highbd_lpf_horizontal_16_c, 12, 1),
- make_tuple(&vpx_highbd_lpf_horizontal_16_sse2,
- &vpx_highbd_lpf_horizontal_16_c, 12, 2),
+ &vpx_highbd_lpf_horizontal_8_c, 12),
+ make_tuple(&vpx_highbd_lpf_horizontal_edge_8_sse2,
+ &vpx_highbd_lpf_horizontal_edge_8_c, 12),
+ make_tuple(&vpx_highbd_lpf_horizontal_edge_16_sse2,
+ &vpx_highbd_lpf_horizontal_edge_16_c, 12),
make_tuple(&vpx_highbd_lpf_vertical_8_sse2,
- &vpx_highbd_lpf_vertical_8_c, 12, 1),
- make_tuple(&wrapper_vertical_16_sse2,
- &wrapper_vertical_16_c, 12, 1),
- make_tuple(&wrapper_vertical_16_dual_sse2,
- &wrapper_vertical_16_dual_c, 8, 1),
- make_tuple(&wrapper_vertical_16_dual_sse2,
- &wrapper_vertical_16_dual_c, 10, 1),
- make_tuple(&wrapper_vertical_16_dual_sse2,
- &wrapper_vertical_16_dual_c, 12, 1)));
+ &vpx_highbd_lpf_vertical_8_c, 12),
+ make_tuple(&vpx_highbd_lpf_vertical_16_sse2,
+ &vpx_highbd_lpf_vertical_16_c, 12),
+ make_tuple(&vpx_highbd_lpf_vertical_16_dual_sse2,
+ &vpx_highbd_lpf_vertical_16_dual_c, 8),
+ make_tuple(&vpx_highbd_lpf_vertical_16_dual_sse2,
+ &vpx_highbd_lpf_vertical_16_dual_c, 10),
+ make_tuple(&vpx_highbd_lpf_vertical_16_dual_sse2,
+ &vpx_highbd_lpf_vertical_16_dual_c, 12)));
#else
INSTANTIATE_TEST_CASE_P(
SSE2, Loop8Test6Param,
::testing::Values(
- make_tuple(&vpx_lpf_horizontal_8_sse2, &vpx_lpf_horizontal_8_c, 8, 1),
- make_tuple(&vpx_lpf_horizontal_16_sse2, &vpx_lpf_horizontal_16_c, 8, 1),
- make_tuple(&vpx_lpf_horizontal_16_sse2, &vpx_lpf_horizontal_16_c, 8, 2),
- make_tuple(&vpx_lpf_vertical_8_sse2, &vpx_lpf_vertical_8_c, 8, 1),
- make_tuple(&wrapper_vertical_16_sse2, &wrapper_vertical_16_c, 8, 1),
- make_tuple(&wrapper_vertical_16_dual_sse2,
- &wrapper_vertical_16_dual_c, 8, 1)));
+ make_tuple(&vpx_lpf_horizontal_8_sse2,
+ &vpx_lpf_horizontal_8_c, 8),
+ make_tuple(&vpx_lpf_horizontal_edge_8_sse2,
+ &vpx_lpf_horizontal_edge_8_c, 8),
+ make_tuple(&vpx_lpf_horizontal_edge_16_sse2,
+ &vpx_lpf_horizontal_edge_16_c, 8),
+ make_tuple(&vpx_lpf_vertical_8_sse2,
+ &vpx_lpf_vertical_8_c, 8),
+ make_tuple(&vpx_lpf_vertical_16_sse2,
+ &vpx_lpf_vertical_16_c, 8),
+ make_tuple(&vpx_lpf_vertical_16_dual_sse2,
+ &vpx_lpf_vertical_16_dual_c, 8)));
#endif // CONFIG_VP9_HIGHBITDEPTH
#endif
@@ -600,9 +516,10 @@
INSTANTIATE_TEST_CASE_P(
AVX2, Loop8Test6Param,
::testing::Values(
- make_tuple(&vpx_lpf_horizontal_16_avx2, &vpx_lpf_horizontal_16_c, 8, 1),
- make_tuple(&vpx_lpf_horizontal_16_avx2, &vpx_lpf_horizontal_16_c, 8,
- 2)));
+ make_tuple(&vpx_lpf_horizontal_edge_8_avx2,
+ &vpx_lpf_horizontal_edge_8_c, 8),
+ make_tuple(&vpx_lpf_horizontal_edge_16_avx2,
+ &vpx_lpf_horizontal_edge_16_c, 8)));
#endif
#if HAVE_SSE2
@@ -659,23 +576,23 @@
#if HAVE_NEON_ASM
// Using #if inside the macro is unsupported on MSVS but the tests are not
// currently built for MSVS with ARM and NEON.
- make_tuple(&vpx_lpf_horizontal_16_neon,
- &vpx_lpf_horizontal_16_c, 8, 1),
- make_tuple(&vpx_lpf_horizontal_16_neon,
- &vpx_lpf_horizontal_16_c, 8, 2),
- make_tuple(&wrapper_vertical_16_neon,
- &wrapper_vertical_16_c, 8, 1),
- make_tuple(&wrapper_vertical_16_dual_neon,
- &wrapper_vertical_16_dual_c, 8, 1),
+ make_tuple(&vpx_lpf_horizontal_edge_8_neon,
+ &vpx_lpf_horizontal_edge_8_c, 8),
+ make_tuple(&vpx_lpf_horizontal_edge_16_neon,
+ &vpx_lpf_horizontal_edge_16_c, 8),
+ make_tuple(&vpx_lpf_vertical_16_neon,
+ &vpx_lpf_vertical_16_c, 8),
+ make_tuple(&vpx_lpf_vertical_16_dual_neon,
+ &vpx_lpf_vertical_16_dual_c, 8),
#endif // HAVE_NEON_ASM
make_tuple(&vpx_lpf_horizontal_8_neon,
- &vpx_lpf_horizontal_8_c, 8, 1),
+ &vpx_lpf_horizontal_8_c, 8),
make_tuple(&vpx_lpf_vertical_8_neon,
- &vpx_lpf_vertical_8_c, 8, 1),
+ &vpx_lpf_vertical_8_c, 8),
make_tuple(&vpx_lpf_horizontal_4_neon,
- &vpx_lpf_horizontal_4_c, 8, 1),
+ &vpx_lpf_horizontal_4_c, 8),
make_tuple(&vpx_lpf_vertical_4_neon,
- &vpx_lpf_vertical_4_c, 8, 1)));
+ &vpx_lpf_vertical_4_c, 8)));
INSTANTIATE_TEST_CASE_P(
NEON, Loop8Test9Param,
::testing::Values(
@@ -692,15 +609,58 @@
#endif // CONFIG_VP9_HIGHBITDEPTH
#endif // HAVE_NEON
+#if HAVE_DSPR2 && !CONFIG_VP9_HIGHBITDEPTH
+INSTANTIATE_TEST_CASE_P(
+ DSPR2, Loop8Test6Param,
+ ::testing::Values(
+ make_tuple(&vpx_lpf_horizontal_4_dspr2,
+ &vpx_lpf_horizontal_4_c, 8),
+ make_tuple(&vpx_lpf_horizontal_8_dspr2,
+ &vpx_lpf_horizontal_8_c, 8),
+ make_tuple(&vpx_lpf_horizontal_edge_8,
+ &vpx_lpf_horizontal_edge_8, 8),
+ make_tuple(&vpx_lpf_horizontal_edge_16,
+ &vpx_lpf_horizontal_edge_16, 8),
+ make_tuple(&vpx_lpf_vertical_4_dspr2,
+ &vpx_lpf_vertical_4_c, 8),
+ make_tuple(&vpx_lpf_vertical_8_dspr2,
+ &vpx_lpf_vertical_8_c, 8),
+ make_tuple(&vpx_lpf_vertical_16_dspr2,
+ &vpx_lpf_vertical_16_c, 8),
+ make_tuple(&vpx_lpf_vertical_16_dual_dspr2,
+ &vpx_lpf_vertical_16_dual_c, 8)));
+
+INSTANTIATE_TEST_CASE_P(
+ DSPR2, Loop8Test9Param,
+ ::testing::Values(
+ make_tuple(&vpx_lpf_horizontal_4_dual_dspr2,
+ &vpx_lpf_horizontal_4_dual_c, 8),
+ make_tuple(&vpx_lpf_horizontal_8_dual_dspr2,
+ &vpx_lpf_horizontal_8_dual_c, 8),
+ make_tuple(&vpx_lpf_vertical_4_dual_dspr2,
+ &vpx_lpf_vertical_4_dual_c, 8),
+ make_tuple(&vpx_lpf_vertical_8_dual_dspr2,
+ &vpx_lpf_vertical_8_dual_c, 8)));
+#endif // HAVE_DSPR2 && !CONFIG_VP9_HIGHBITDEPTH
+
#if HAVE_MSA && (!CONFIG_VP9_HIGHBITDEPTH)
INSTANTIATE_TEST_CASE_P(
MSA, Loop8Test6Param,
::testing::Values(
- make_tuple(&vpx_lpf_horizontal_8_msa, &vpx_lpf_horizontal_8_c, 8, 1),
- make_tuple(&vpx_lpf_horizontal_16_msa, &vpx_lpf_horizontal_16_c, 8, 1),
- make_tuple(&vpx_lpf_horizontal_16_msa, &vpx_lpf_horizontal_16_c, 8, 2),
- make_tuple(&vpx_lpf_vertical_8_msa, &vpx_lpf_vertical_8_c, 8, 1),
- make_tuple(&wrapper_vertical_16_msa, &wrapper_vertical_16_c, 8, 1)));
+ make_tuple(&vpx_lpf_horizontal_4_msa,
+ &vpx_lpf_horizontal_4_c, 8),
+ make_tuple(&vpx_lpf_horizontal_8_msa,
+ &vpx_lpf_horizontal_8_c, 8),
+ make_tuple(&vpx_lpf_horizontal_edge_8_msa,
+ &vpx_lpf_horizontal_edge_8_c, 8),
+ make_tuple(&vpx_lpf_horizontal_edge_16_msa,
+ &vpx_lpf_horizontal_edge_16_c, 8),
+ make_tuple(&vpx_lpf_vertical_4_msa,
+ &vpx_lpf_vertical_4_c, 8),
+ make_tuple(&vpx_lpf_vertical_8_msa,
+ &vpx_lpf_vertical_8_c, 8),
+ make_tuple(&vpx_lpf_vertical_16_msa,
+ &vpx_lpf_vertical_16_c, 8)));
INSTANTIATE_TEST_CASE_P(
MSA, Loop8Test9Param,
--- a/vp10/common/loopfilter.c
+++ b/vp10/common/loopfilter.c
@@ -324,7 +324,6 @@
const loop_filter_thresh *lfi0 = lfi_n->lfthr + *lfl;
const loop_filter_thresh *lfi1 = lfi_n->lfthr + *(lfl + lfl_forward);
- // TODO(yunqingwang): count in loopfilter functions should be removed.
     if (mask & 1) {       if ((mask_16x16_0 | mask_16x16_1) & 1) {         if ((mask_16x16_0 & mask_16x16_1) & 1) {@@ -345,11 +344,10 @@
lfi0->hev_thr, lfi1->mblim, lfi1->lim,
lfi1->hev_thr);
         } else if (mask_8x8_0 & 1) {- vpx_lpf_vertical_8(s, pitch, lfi0->mblim, lfi0->lim, lfi0->hev_thr,
- 1);
+ vpx_lpf_vertical_8(s, pitch, lfi0->mblim, lfi0->lim, lfi0->hev_thr);
         } else {vpx_lpf_vertical_8(s + 8 * pitch, pitch, lfi1->mblim, lfi1->lim,
- lfi1->hev_thr, 1);
+ lfi1->hev_thr);
}
}
@@ -359,11 +357,10 @@
lfi0->hev_thr, lfi1->mblim, lfi1->lim,
lfi1->hev_thr);
         } else if (mask_4x4_0 & 1) {- vpx_lpf_vertical_4(s, pitch, lfi0->mblim, lfi0->lim, lfi0->hev_thr,
- 1);
+ vpx_lpf_vertical_4(s, pitch, lfi0->mblim, lfi0->lim, lfi0->hev_thr);
         } else {vpx_lpf_vertical_4(s + 8 * pitch, pitch, lfi1->mblim, lfi1->lim,
- lfi1->hev_thr, 1);
+ lfi1->hev_thr);
}
}
@@ -374,10 +371,10 @@
lfi1->hev_thr);
         } else if (mask_4x4_int_0 & 1) {vpx_lpf_vertical_4(s + 4, pitch, lfi0->mblim, lfi0->lim,
- lfi0->hev_thr, 1);
+ lfi0->hev_thr);
         } else {vpx_lpf_vertical_4(s + 8 * pitch + 4, pitch, lfi1->mblim, lfi1->lim,
- lfi1->hev_thr, 1);
+ lfi1->hev_thr);
}
}
}
@@ -424,7 +421,6 @@
const loop_filter_thresh *lfi0 = lfi_n->lfthr + *lfl;
const loop_filter_thresh *lfi1 = lfi_n->lfthr + *(lfl + lfl_forward);
- // TODO(yunqingwang): count in loopfilter functions should be removed.
     if (mask & 1) {       if ((mask_16x16_0 | mask_16x16_1) & 1) {         if ((mask_16x16_0 & mask_16x16_1) & 1) {@@ -446,10 +442,10 @@
lfi1->hev_thr, bd);
         } else if (mask_8x8_0 & 1) {vpx_highbd_lpf_vertical_8(s, pitch, lfi0->mblim, lfi0->lim,
- lfi0->hev_thr, 1, bd);
+ lfi0->hev_thr, bd);
         } else {vpx_highbd_lpf_vertical_8(s + 8 * pitch, pitch, lfi1->mblim,
- lfi1->lim, lfi1->hev_thr, 1, bd);
+ lfi1->lim, lfi1->hev_thr, bd);
}
}
@@ -460,10 +456,10 @@
lfi1->hev_thr, bd);
         } else if (mask_4x4_0 & 1) {vpx_highbd_lpf_vertical_4(s, pitch, lfi0->mblim, lfi0->lim,
- lfi0->hev_thr, 1, bd);
+ lfi0->hev_thr, bd);
         } else {vpx_highbd_lpf_vertical_4(s + 8 * pitch, pitch, lfi1->mblim,
- lfi1->lim, lfi1->hev_thr, 1, bd);
+ lfi1->lim, lfi1->hev_thr, bd);
}
}
@@ -474,10 +470,10 @@
lfi1->hev_thr, bd);
         } else if (mask_4x4_int_0 & 1) {vpx_highbd_lpf_vertical_4(s + 4, pitch, lfi0->mblim, lfi0->lim,
- lfi0->hev_thr, 1, bd);
+ lfi0->hev_thr, bd);
         } else {vpx_highbd_lpf_vertical_4(s + 8 * pitch + 4, pitch, lfi1->mblim,
- lfi1->lim, lfi1->hev_thr, 1, bd);
+ lfi1->lim, lfi1->hev_thr, bd);
}
}
}
@@ -514,12 +510,12 @@
     if (mask & 1) {       if (mask_16x16 & 1) {         if ((mask_16x16 & 3) == 3) {- vpx_lpf_horizontal_16(s, pitch, lfi->mblim, lfi->lim,
- lfi->hev_thr, 2);
+ vpx_lpf_horizontal_edge_16(s, pitch, lfi->mblim, lfi->lim,
+ lfi->hev_thr);
count = 2;
         } else {- vpx_lpf_horizontal_16(s, pitch, lfi->mblim, lfi->lim,
- lfi->hev_thr, 1);
+ vpx_lpf_horizontal_edge_8(s, pitch, lfi->mblim, lfi->lim,
+ lfi->hev_thr);
}
       } else if (mask_8x8 & 1) {         if ((mask_8x8 & 3) == 3) {@@ -537,18 +533,18 @@
           } else {if (mask_4x4_int & 1)
vpx_lpf_horizontal_4(s + 4 * pitch, pitch, lfi->mblim, lfi->lim,
- lfi->hev_thr, 1);
+ lfi->hev_thr);
else if (mask_4x4_int & 2)
vpx_lpf_horizontal_4(s + 8 + 4 * pitch, pitch, lfin->mblim,
- lfin->lim, lfin->hev_thr, 1);
+ lfin->lim, lfin->hev_thr);
}
count = 2;
         } else {- vpx_lpf_horizontal_8(s, pitch, lfi->mblim, lfi->lim, lfi->hev_thr, 1);
+ vpx_lpf_horizontal_8(s, pitch, lfi->mblim, lfi->lim, lfi->hev_thr);
if (mask_4x4_int & 1)
vpx_lpf_horizontal_4(s + 4 * pitch, pitch, lfi->mblim, lfi->lim,
- lfi->hev_thr, 1);
+ lfi->hev_thr);
}
       } else if (mask_4x4 & 1) {         if ((mask_4x4 & 3) == 3) {@@ -565,22 +561,22 @@
           } else {if (mask_4x4_int & 1)
vpx_lpf_horizontal_4(s + 4 * pitch, pitch, lfi->mblim, lfi->lim,
- lfi->hev_thr, 1);
+ lfi->hev_thr);
else if (mask_4x4_int & 2)
vpx_lpf_horizontal_4(s + 8 + 4 * pitch, pitch, lfin->mblim,
- lfin->lim, lfin->hev_thr, 1);
+ lfin->lim, lfin->hev_thr);
}
count = 2;
         } else {- vpx_lpf_horizontal_4(s, pitch, lfi->mblim, lfi->lim, lfi->hev_thr, 1);
+ vpx_lpf_horizontal_4(s, pitch, lfi->mblim, lfi->lim, lfi->hev_thr);
if (mask_4x4_int & 1)
vpx_lpf_horizontal_4(s + 4 * pitch, pitch, lfi->mblim, lfi->lim,
- lfi->hev_thr, 1);
+ lfi->hev_thr);
}
       } else if (mask_4x4_int & 1) {vpx_lpf_horizontal_4(s + 4 * pitch, pitch, lfi->mblim, lfi->lim,
- lfi->hev_thr, 1);
+ lfi->hev_thr);
}
}
s += 8 * count;
@@ -611,12 +607,12 @@
     if (mask & 1) {       if (mask_16x16 & 1) {         if ((mask_16x16 & 3) == 3) {- vpx_highbd_lpf_horizontal_16(s, pitch, lfi->mblim, lfi->lim,
- lfi->hev_thr, 2, bd);
+ vpx_highbd_lpf_horizontal_edge_16(s, pitch, lfi->mblim, lfi->lim,
+ lfi->hev_thr, bd);
count = 2;
         } else {- vpx_highbd_lpf_horizontal_16(s, pitch, lfi->mblim, lfi->lim,
- lfi->hev_thr, 1, bd);
+ vpx_highbd_lpf_horizontal_edge_8(s, pitch, lfi->mblim, lfi->lim,
+ lfi->hev_thr, bd);
}
       } else if (mask_8x8 & 1) {         if ((mask_8x8 & 3) == 3) {@@ -635,20 +631,20 @@
           } else {             if (mask_4x4_int & 1) {vpx_highbd_lpf_horizontal_4(s + 4 * pitch, pitch, lfi->mblim,
- lfi->lim, lfi->hev_thr, 1, bd);
+ lfi->lim, lfi->hev_thr, bd);
             } else if (mask_4x4_int & 2) {vpx_highbd_lpf_horizontal_4(s + 8 + 4 * pitch, pitch, lfin->mblim,
- lfin->lim, lfin->hev_thr, 1, bd);
+ lfin->lim, lfin->hev_thr, bd);
}
}
count = 2;
         } else {vpx_highbd_lpf_horizontal_8(s, pitch, lfi->mblim, lfi->lim,
- lfi->hev_thr, 1, bd);
+ lfi->hev_thr, bd);
           if (mask_4x4_int & 1) {vpx_highbd_lpf_horizontal_4(s + 4 * pitch, pitch, lfi->mblim,
- lfi->lim, lfi->hev_thr, 1, bd);
+ lfi->lim, lfi->hev_thr, bd);
}
}
       } else if (mask_4x4 & 1) {@@ -667,25 +663,25 @@
           } else {             if (mask_4x4_int & 1) {vpx_highbd_lpf_horizontal_4(s + 4 * pitch, pitch, lfi->mblim,
- lfi->lim, lfi->hev_thr, 1, bd);
+ lfi->lim, lfi->hev_thr, bd);
             } else if (mask_4x4_int & 2) {vpx_highbd_lpf_horizontal_4(s + 8 + 4 * pitch, pitch, lfin->mblim,
- lfin->lim, lfin->hev_thr, 1, bd);
+ lfin->lim, lfin->hev_thr, bd);
}
}
count = 2;
         } else {vpx_highbd_lpf_horizontal_4(s, pitch, lfi->mblim, lfi->lim,
- lfi->hev_thr, 1, bd);
+ lfi->hev_thr, bd);
           if (mask_4x4_int & 1) {vpx_highbd_lpf_horizontal_4(s + 4 * pitch, pitch, lfi->mblim,
- lfi->lim, lfi->hev_thr, 1, bd);
+ lfi->lim, lfi->hev_thr, bd);
}
}
       } else if (mask_4x4_int & 1) {vpx_highbd_lpf_horizontal_4(s + 4 * pitch, pitch, lfi->mblim, lfi->lim,
- lfi->hev_thr, 1, bd);
+ lfi->hev_thr, bd);
}
}
s += 8 * count;
@@ -1127,13 +1123,13 @@
       if (mask_16x16 & 1) {vpx_lpf_vertical_16(s, pitch, lfi->mblim, lfi->lim, lfi->hev_thr);
       } else if (mask_8x8 & 1) {- vpx_lpf_vertical_8(s, pitch, lfi->mblim, lfi->lim, lfi->hev_thr, 1);
+ vpx_lpf_vertical_8(s, pitch, lfi->mblim, lfi->lim, lfi->hev_thr);
       } else if (mask_4x4 & 1) {- vpx_lpf_vertical_4(s, pitch, lfi->mblim, lfi->lim, lfi->hev_thr, 1);
+ vpx_lpf_vertical_4(s, pitch, lfi->mblim, lfi->lim, lfi->hev_thr);
}
}
if (mask_4x4_int & 1)
- vpx_lpf_vertical_4(s + 4, pitch, lfi->mblim, lfi->lim, lfi->hev_thr, 1);
+ vpx_lpf_vertical_4(s + 4, pitch, lfi->mblim, lfi->lim, lfi->hev_thr);
s += 8;
lfl += 1;
mask_16x16 >>= 1;
@@ -1163,15 +1159,15 @@
lfi->hev_thr, bd);
       } else if (mask_8x8 & 1) {vpx_highbd_lpf_vertical_8(s, pitch, lfi->mblim, lfi->lim,
- lfi->hev_thr, 1, bd);
+ lfi->hev_thr, bd);
       } else if (mask_4x4 & 1) {vpx_highbd_lpf_vertical_4(s, pitch, lfi->mblim, lfi->lim,
- lfi->hev_thr, 1, bd);
+ lfi->hev_thr, bd);
}
}
if (mask_4x4_int & 1)
vpx_highbd_lpf_vertical_4(s + 4, pitch, lfi->mblim, lfi->lim,
- lfi->hev_thr, 1, bd);
+ lfi->hev_thr, bd);
s += 8;
lfl += 1;
mask_16x16 >>= 1;
--- a/vp9/common/vp9_loopfilter.c
+++ b/vp9/common/vp9_loopfilter.c
@@ -324,7 +324,6 @@
const loop_filter_thresh *lfi0 = lfi_n->lfthr + *lfl;
const loop_filter_thresh *lfi1 = lfi_n->lfthr + *(lfl + lfl_forward);
- // TODO(yunqingwang): count in loopfilter functions should be removed.
     if (mask & 1) {       if ((mask_16x16_0 | mask_16x16_1) & 1) {         if ((mask_16x16_0 & mask_16x16_1) & 1) {@@ -345,11 +344,10 @@
lfi0->hev_thr, lfi1->mblim, lfi1->lim,
lfi1->hev_thr);
         } else if (mask_8x8_0 & 1) {- vpx_lpf_vertical_8(s, pitch, lfi0->mblim, lfi0->lim, lfi0->hev_thr,
- 1);
+ vpx_lpf_vertical_8(s, pitch, lfi0->mblim, lfi0->lim, lfi0->hev_thr);
         } else {vpx_lpf_vertical_8(s + 8 * pitch, pitch, lfi1->mblim, lfi1->lim,
- lfi1->hev_thr, 1);
+ lfi1->hev_thr);
}
}
@@ -359,11 +357,10 @@
lfi0->hev_thr, lfi1->mblim, lfi1->lim,
lfi1->hev_thr);
         } else if (mask_4x4_0 & 1) {- vpx_lpf_vertical_4(s, pitch, lfi0->mblim, lfi0->lim, lfi0->hev_thr,
- 1);
+ vpx_lpf_vertical_4(s, pitch, lfi0->mblim, lfi0->lim, lfi0->hev_thr);
         } else {vpx_lpf_vertical_4(s + 8 * pitch, pitch, lfi1->mblim, lfi1->lim,
- lfi1->hev_thr, 1);
+ lfi1->hev_thr);
}
}
@@ -374,10 +371,10 @@
lfi1->hev_thr);
         } else if (mask_4x4_int_0 & 1) {vpx_lpf_vertical_4(s + 4, pitch, lfi0->mblim, lfi0->lim,
- lfi0->hev_thr, 1);
+ lfi0->hev_thr);
         } else {vpx_lpf_vertical_4(s + 8 * pitch + 4, pitch, lfi1->mblim, lfi1->lim,
- lfi1->hev_thr, 1);
+ lfi1->hev_thr);
}
}
}
@@ -424,7 +421,6 @@
const loop_filter_thresh *lfi0 = lfi_n->lfthr + *lfl;
const loop_filter_thresh *lfi1 = lfi_n->lfthr + *(lfl + lfl_forward);
- // TODO(yunqingwang): count in loopfilter functions should be removed.
     if (mask & 1) {       if ((mask_16x16_0 | mask_16x16_1) & 1) {         if ((mask_16x16_0 & mask_16x16_1) & 1) {@@ -446,10 +442,10 @@
lfi1->hev_thr, bd);
         } else if (mask_8x8_0 & 1) {vpx_highbd_lpf_vertical_8(s, pitch, lfi0->mblim, lfi0->lim,
- lfi0->hev_thr, 1, bd);
+ lfi0->hev_thr, bd);
         } else {vpx_highbd_lpf_vertical_8(s + 8 * pitch, pitch, lfi1->mblim,
- lfi1->lim, lfi1->hev_thr, 1, bd);
+ lfi1->lim, lfi1->hev_thr, bd);
}
}
@@ -460,10 +456,10 @@
lfi1->hev_thr, bd);
         } else if (mask_4x4_0 & 1) {vpx_highbd_lpf_vertical_4(s, pitch, lfi0->mblim, lfi0->lim,
- lfi0->hev_thr, 1, bd);
+ lfi0->hev_thr, bd);
         } else {vpx_highbd_lpf_vertical_4(s + 8 * pitch, pitch, lfi1->mblim,
- lfi1->lim, lfi1->hev_thr, 1, bd);
+ lfi1->lim, lfi1->hev_thr, bd);
}
}
@@ -474,10 +470,10 @@
lfi1->hev_thr, bd);
         } else if (mask_4x4_int_0 & 1) {vpx_highbd_lpf_vertical_4(s + 4, pitch, lfi0->mblim, lfi0->lim,
- lfi0->hev_thr, 1, bd);
+ lfi0->hev_thr, bd);
         } else {vpx_highbd_lpf_vertical_4(s + 8 * pitch + 4, pitch, lfi1->mblim,
- lfi1->lim, lfi1->hev_thr, 1, bd);
+ lfi1->lim, lfi1->hev_thr, bd);
}
}
}
@@ -514,12 +510,12 @@
     if (mask & 1) {       if (mask_16x16 & 1) {         if ((mask_16x16 & 3) == 3) {- vpx_lpf_horizontal_16(s, pitch, lfi->mblim, lfi->lim,
- lfi->hev_thr, 2);
+ vpx_lpf_horizontal_edge_16(s, pitch, lfi->mblim, lfi->lim,
+ lfi->hev_thr);
count = 2;
         } else {- vpx_lpf_horizontal_16(s, pitch, lfi->mblim, lfi->lim,
- lfi->hev_thr, 1);
+ vpx_lpf_horizontal_edge_8(s, pitch, lfi->mblim, lfi->lim,
+ lfi->hev_thr);
}
       } else if (mask_8x8 & 1) {         if ((mask_8x8 & 3) == 3) {@@ -537,18 +533,18 @@
           } else {if (mask_4x4_int & 1)
vpx_lpf_horizontal_4(s + 4 * pitch, pitch, lfi->mblim, lfi->lim,
- lfi->hev_thr, 1);
+ lfi->hev_thr);
else if (mask_4x4_int & 2)
vpx_lpf_horizontal_4(s + 8 + 4 * pitch, pitch, lfin->mblim,
- lfin->lim, lfin->hev_thr, 1);
+ lfin->lim, lfin->hev_thr);
}
count = 2;
         } else {- vpx_lpf_horizontal_8(s, pitch, lfi->mblim, lfi->lim, lfi->hev_thr, 1);
+ vpx_lpf_horizontal_8(s, pitch, lfi->mblim, lfi->lim, lfi->hev_thr);
if (mask_4x4_int & 1)
vpx_lpf_horizontal_4(s + 4 * pitch, pitch, lfi->mblim, lfi->lim,
- lfi->hev_thr, 1);
+ lfi->hev_thr);
}
       } else if (mask_4x4 & 1) {         if ((mask_4x4 & 3) == 3) {@@ -565,22 +561,22 @@
           } else {if (mask_4x4_int & 1)
vpx_lpf_horizontal_4(s + 4 * pitch, pitch, lfi->mblim, lfi->lim,
- lfi->hev_thr, 1);
+ lfi->hev_thr);
else if (mask_4x4_int & 2)
vpx_lpf_horizontal_4(s + 8 + 4 * pitch, pitch, lfin->mblim,
- lfin->lim, lfin->hev_thr, 1);
+ lfin->lim, lfin->hev_thr);
}
count = 2;
         } else {- vpx_lpf_horizontal_4(s, pitch, lfi->mblim, lfi->lim, lfi->hev_thr, 1);
+ vpx_lpf_horizontal_4(s, pitch, lfi->mblim, lfi->lim, lfi->hev_thr);
if (mask_4x4_int & 1)
vpx_lpf_horizontal_4(s + 4 * pitch, pitch, lfi->mblim, lfi->lim,
- lfi->hev_thr, 1);
+ lfi->hev_thr);
}
       } else if (mask_4x4_int & 1) {vpx_lpf_horizontal_4(s + 4 * pitch, pitch, lfi->mblim, lfi->lim,
- lfi->hev_thr, 1);
+ lfi->hev_thr);
}
}
s += 8 * count;
@@ -611,12 +607,12 @@
     if (mask & 1) {       if (mask_16x16 & 1) {         if ((mask_16x16 & 3) == 3) {- vpx_highbd_lpf_horizontal_16(s, pitch, lfi->mblim, lfi->lim,
- lfi->hev_thr, 2, bd);
+ vpx_highbd_lpf_horizontal_edge_16(s, pitch, lfi->mblim, lfi->lim,
+ lfi->hev_thr, bd);
count = 2;
         } else {- vpx_highbd_lpf_horizontal_16(s, pitch, lfi->mblim, lfi->lim,
- lfi->hev_thr, 1, bd);
+ vpx_highbd_lpf_horizontal_edge_8(s, pitch, lfi->mblim, lfi->lim,
+ lfi->hev_thr, bd);
}
       } else if (mask_8x8 & 1) {         if ((mask_8x8 & 3) == 3) {@@ -635,20 +631,20 @@
           } else {             if (mask_4x4_int & 1) {vpx_highbd_lpf_horizontal_4(s + 4 * pitch, pitch, lfi->mblim,
- lfi->lim, lfi->hev_thr, 1, bd);
+ lfi->lim, lfi->hev_thr, bd);
             } else if (mask_4x4_int & 2) {vpx_highbd_lpf_horizontal_4(s + 8 + 4 * pitch, pitch, lfin->mblim,
- lfin->lim, lfin->hev_thr, 1, bd);
+ lfin->lim, lfin->hev_thr, bd);
}
}
count = 2;
         } else {vpx_highbd_lpf_horizontal_8(s, pitch, lfi->mblim, lfi->lim,
- lfi->hev_thr, 1, bd);
+ lfi->hev_thr, bd);
           if (mask_4x4_int & 1) {vpx_highbd_lpf_horizontal_4(s + 4 * pitch, pitch, lfi->mblim,
- lfi->lim, lfi->hev_thr, 1, bd);
+ lfi->lim, lfi->hev_thr, bd);
}
}
       } else if (mask_4x4 & 1) {@@ -667,25 +663,25 @@
           } else {             if (mask_4x4_int & 1) {vpx_highbd_lpf_horizontal_4(s + 4 * pitch, pitch, lfi->mblim,
- lfi->lim, lfi->hev_thr, 1, bd);
+ lfi->lim, lfi->hev_thr, bd);
             } else if (mask_4x4_int & 2) {vpx_highbd_lpf_horizontal_4(s + 8 + 4 * pitch, pitch, lfin->mblim,
- lfin->lim, lfin->hev_thr, 1, bd);
+ lfin->lim, lfin->hev_thr, bd);
}
}
count = 2;
         } else {vpx_highbd_lpf_horizontal_4(s, pitch, lfi->mblim, lfi->lim,
- lfi->hev_thr, 1, bd);
+ lfi->hev_thr, bd);
           if (mask_4x4_int & 1) {vpx_highbd_lpf_horizontal_4(s + 4 * pitch, pitch, lfi->mblim,
- lfi->lim, lfi->hev_thr, 1, bd);
+ lfi->lim, lfi->hev_thr, bd);
}
}
       } else if (mask_4x4_int & 1) {vpx_highbd_lpf_horizontal_4(s + 4 * pitch, pitch, lfi->mblim, lfi->lim,
- lfi->hev_thr, 1, bd);
+ lfi->hev_thr, bd);
}
}
s += 8 * count;
@@ -1102,13 +1098,13 @@
       if (mask_16x16 & 1) {vpx_lpf_vertical_16(s, pitch, lfi->mblim, lfi->lim, lfi->hev_thr);
       } else if (mask_8x8 & 1) {- vpx_lpf_vertical_8(s, pitch, lfi->mblim, lfi->lim, lfi->hev_thr, 1);
+ vpx_lpf_vertical_8(s, pitch, lfi->mblim, lfi->lim, lfi->hev_thr);
       } else if (mask_4x4 & 1) {- vpx_lpf_vertical_4(s, pitch, lfi->mblim, lfi->lim, lfi->hev_thr, 1);
+ vpx_lpf_vertical_4(s, pitch, lfi->mblim, lfi->lim, lfi->hev_thr);
}
}
if (mask_4x4_int & 1)
- vpx_lpf_vertical_4(s + 4, pitch, lfi->mblim, lfi->lim, lfi->hev_thr, 1);
+ vpx_lpf_vertical_4(s + 4, pitch, lfi->mblim, lfi->lim, lfi->hev_thr);
s += 8;
lfl += 1;
mask_16x16 >>= 1;
@@ -1138,15 +1134,15 @@
lfi->hev_thr, bd);
       } else if (mask_8x8 & 1) {vpx_highbd_lpf_vertical_8(s, pitch, lfi->mblim, lfi->lim,
- lfi->hev_thr, 1, bd);
+ lfi->hev_thr, bd);
       } else if (mask_4x4 & 1) {vpx_highbd_lpf_vertical_4(s, pitch, lfi->mblim, lfi->lim,
- lfi->hev_thr, 1, bd);
+ lfi->hev_thr, bd);
}
}
if (mask_4x4_int & 1)
vpx_highbd_lpf_vertical_4(s + 4, pitch, lfi->mblim, lfi->lim,
- lfi->hev_thr, 1, bd);
+ lfi->hev_thr, bd);
s += 8;
lfl += 1;
mask_16x16 >>= 1;
--- a/vpx_dsp/arm/loopfilter_4_neon.asm
+++ b/vpx_dsp/arm/loopfilter_4_neon.asm
@@ -16,15 +16,12 @@
; Currently vpx only works on iterations 8 at a time. The vp8 loop filter
; works on 16 iterations at a time.
-; TODO(fgalligan): See about removing the count code as this function is only
-; called with a count of 1.
;
; void vpx_lpf_horizontal_4_neon(uint8_t *s,
; int p /* pitch */,
; const uint8_t *blimit,
; const uint8_t *limit,
-; const uint8_t *thresh,
-; int count)
+; const uint8_t *thresh)
;
; r0 uint8_t *s,
; r1 int p, /* pitch */
@@ -31,22 +28,16 @@
; r2 const uint8_t *blimit,
; r3 const uint8_t *limit,
; sp const uint8_t *thresh,
-; sp+4 int count
|vpx_lpf_horizontal_4_neon| PROC
     push        {lr}     vld1.8      {d0[]}, [r2]               ; duplicate *blimit- ldr r12, [sp, #8] ; load count
ldr r2, [sp, #4] ; load thresh
add r1, r1, r1 ; double pitch
- cmp r12, #0
- beq end_vpx_lf_h_edge
-
     vld1.8      {d1[]}, [r3]               ; duplicate *limit     vld1.8      {d2[]}, [r2]               ; duplicate *thresh-count_lf_h_loop
sub r2, r0, r1, lsl #1 ; move src pointer down by 4 lines
add r3, r2, r1, lsr #1 ; set to 3 lines down
@@ -69,25 +60,17 @@
     vst1.u8     {d6}, [r2@64], r1          ; store oq0     vst1.u8     {d7}, [r3@64], r1          ; store oq1- add r0, r0, #8
- subs r12, r12, #1
- bne count_lf_h_loop
-
-end_vpx_lf_h_edge
     pop         {pc}ENDP ; |vpx_lpf_horizontal_4_neon|
; Currently vpx only works on iterations 8 at a time. The vp8 loop filter
; works on 16 iterations at a time.
-; TODO(fgalligan): See about removing the count code as this function is only
-; called with a count of 1.
;
; void vpx_lpf_vertical_4_neon(uint8_t *s,
; int p /* pitch */,
; const uint8_t *blimit,
; const uint8_t *limit,
-; const uint8_t *thresh,
-; int count)
+; const uint8_t *thresh)
;
; r0 uint8_t *s,
; r1 int p, /* pitch */
@@ -94,22 +77,17 @@
; r2 const uint8_t *blimit,
; r3 const uint8_t *limit,
; sp const uint8_t *thresh,
-; sp+4 int count
|vpx_lpf_vertical_4_neon| PROC
     push        {lr}     vld1.8      {d0[]}, [r2]              ; duplicate *blimit- ldr r12, [sp, #8] ; load count
     vld1.8      {d1[]}, [r3]              ; duplicate *limitldr r3, [sp, #4] ; load thresh
sub r2, r0, #4 ; move s pointer down by 4 columns
- cmp r12, #0
- beq end_vpx_lf_v_edge
     vld1.8      {d2[]}, [r3]              ; duplicate *thresh-count_lf_v_loop
     vld1.u8     {d3}, [r2], r1             ; load s data     vld1.u8     {d4}, [r2], r1     vld1.u8     {d5}, [r2], r1@@ -149,12 +127,6 @@
     vst4.8      {d4[6], d5[6], d6[6], d7[6]}, [r0], r1     vst4.8      {d4[7], d5[7], d6[7], d7[7]}, [r0]- add r0, r0, r1, lsl #3 ; s += pitch * 8
- subs r12, r12, #1
- subne r2, r0, #4 ; move s pointer down by 4 columns
- bne count_lf_v_loop
-
-end_vpx_lf_v_edge
     pop         {pc}ENDP ; |vpx_lpf_vertical_4_neon|
--- a/vpx_dsp/arm/loopfilter_4_neon.c
+++ b/vpx_dsp/arm/loopfilter_4_neon.c
@@ -115,22 +115,18 @@
int pitch,
const uint8_t *blimit,
const uint8_t *limit,
- const uint8_t *thresh,
-        int count) {+        const uint8_t *thresh) {int i;
uint8_t *s, *psrc;
uint8x8_t dblimit, dlimit, dthresh;
uint8x8_t d3u8, d4u8, d5u8, d6u8, d7u8, d16u8, d17u8, d18u8;
- if (count == 0) // end_vpx_lf_h_edge
- return;
-
dblimit = vld1_u8(blimit);
dlimit = vld1_u8(limit);
dthresh = vld1_u8(thresh);
psrc = src - (pitch << 2);
-    for (i = 0; i < count; i++) {+    for (i = 0; i < 1; i++) {s = psrc + i * 8;
d3u8 = vld1_u8(s);
@@ -170,8 +166,7 @@
int pitch,
const uint8_t *blimit,
const uint8_t *limit,
- const uint8_t *thresh,
-        int count) {+        const uint8_t *thresh) {int i, pitch8;
uint8_t *s;
uint8x8_t dblimit, dlimit, dthresh;
@@ -181,15 +176,12 @@
uint8x8x2_t d2tmp8, d2tmp9, d2tmp10, d2tmp11;
uint8x8x4_t d4Result;
- if (count == 0) // end_vpx_lf_h_edge
- return;
-
dblimit = vld1_u8(blimit);
dlimit = vld1_u8(limit);
dthresh = vld1_u8(thresh);
pitch8 = pitch * 8;
-    for (i = 0; i < count; i++, src += pitch8) {+    for (i = 0; i < 1; i++, src += pitch8) {s = src - (i + 1) * 4;
d3u8 = vld1_u8(s);
--- a/vpx_dsp/arm/loopfilter_8_neon.asm
+++ b/vpx_dsp/arm/loopfilter_8_neon.asm
@@ -16,35 +16,26 @@
; Currently vpx only works on iterations 8 at a time. The vp8 loop filter
; works on 16 iterations at a time.
-; TODO(fgalligan): See about removing the count code as this function is only
-; called with a count of 1.
;
; void vpx_lpf_horizontal_8_neon(uint8_t *s, int p,
; const uint8_t *blimit,
; const uint8_t *limit,
-; const uint8_t *thresh,
-; int count)
+; const uint8_t *thresh)
; r0 uint8_t *s,
; r1 int p, /* pitch */
; r2 const uint8_t *blimit,
; r3 const uint8_t *limit,
; sp const uint8_t *thresh,
-; sp+4 int count
|vpx_lpf_horizontal_8_neon| PROC
     push        {r4-r5, lr}     vld1.8      {d0[]}, [r2]               ; duplicate *blimit- ldr r12, [sp, #16] ; load count
ldr r2, [sp, #12] ; load thresh
add r1, r1, r1 ; double pitch
- cmp r12, #0
- beq end_vpx_mblf_h_edge
-
     vld1.8      {d1[]}, [r3]               ; duplicate *limit     vld1.8      {d2[]}, [r2]               ; duplicate *thresh-count_mblf_h_loop
sub r3, r0, r1, lsl #1 ; move src pointer down by 4 lines
add r2, r3, r1, lsr #1 ; set to 3 lines down
@@ -69,11 +60,6 @@
     vst1.u8     {d4}, [r2@64], r1          ; store oq1     vst1.u8     {d5}, [r3@64], r1          ; store oq2- add r0, r0, #8
- subs r12, r12, #1
- bne count_mblf_h_loop
-
-end_vpx_mblf_h_edge
     pop         {r4-r5, pc}ENDP ; |vpx_lpf_horizontal_8_neon|
@@ -82,8 +68,7 @@
; int pitch,
; const uint8_t *blimit,
; const uint8_t *limit,
-; const uint8_t *thresh,
-; int count)
+; const uint8_t *thresh)
;
; r0 uint8_t *s,
; r1 int pitch,
@@ -90,22 +75,17 @@
; r2 const uint8_t *blimit,
; r3 const uint8_t *limit,
; sp const uint8_t *thresh,
-; sp+4 int count
|vpx_lpf_vertical_8_neon| PROC
     push        {r4-r5, lr}     vld1.8      {d0[]}, [r2]              ; duplicate *blimit- ldr r12, [sp, #16] ; load count
     vld1.8      {d1[]}, [r3]              ; duplicate *limitldr r3, [sp, #12] ; load thresh
sub r2, r0, #4 ; move s pointer down by 4 columns
- cmp r12, #0
- beq end_vpx_mblf_v_edge
     vld1.8      {d2[]}, [r3]              ; duplicate *thresh-count_mblf_v_loop
     vld1.u8     {d3}, [r2], r1             ; load s data     vld1.u8     {d4}, [r2], r1     vld1.u8     {d5}, [r2], r1@@ -156,12 +136,6 @@
     vst2.8      {d4[6], d5[6]}, [r3], r1     vst2.8      {d4[7], d5[7]}, [r3]- add r0, r0, r1, lsl #3 ; s += pitch * 8
- subs r12, r12, #1
- subne r2, r0, #4 ; move s pointer down by 4 columns
- bne count_mblf_v_loop
-
-end_vpx_mblf_v_edge
     pop         {r4-r5, pc}ENDP ; |vpx_lpf_vertical_8_neon|
--- a/vpx_dsp/arm/loopfilter_8_neon.c
+++ b/vpx_dsp/arm/loopfilter_8_neon.c
@@ -268,8 +268,7 @@
int pitch,
const uint8_t *blimit,
const uint8_t *limit,
- const uint8_t *thresh,
-        int count) {+        const uint8_t *thresh) {int i;
uint8_t *s, *psrc;
uint8x8_t dblimit, dlimit, dthresh;
@@ -276,15 +275,12 @@
uint8x8_t d0u8, d1u8, d2u8, d3u8, d4u8, d5u8, d6u8, d7u8;
uint8x8_t d16u8, d17u8, d18u8;
- if (count == 0) // end_vpx_mblf_h_edge
- return;
-
dblimit = vld1_u8(blimit);
dlimit = vld1_u8(limit);
dthresh = vld1_u8(thresh);
psrc = src - (pitch << 2);
-    for (i = 0; i < count; i++) {+    for (i = 0; i < 1; i++) {s = psrc + i * 8;
d3u8 = vld1_u8(s);
@@ -328,8 +324,7 @@
int pitch,
const uint8_t *blimit,
const uint8_t *limit,
- const uint8_t *thresh,
-        int count) {+        const uint8_t *thresh) {int i;
uint8_t *s;
uint8x8_t dblimit, dlimit, dthresh;
@@ -341,14 +336,11 @@
uint8x8x4_t d4Result;
uint8x8x2_t d2Result;
- if (count == 0)
- return;
-
dblimit = vld1_u8(blimit);
dlimit = vld1_u8(limit);
dthresh = vld1_u8(thresh);
-    for (i = 0; i < count; i++) {+    for (i = 0; i < 1; i++) {s = src + (i * (pitch << 3)) - 4;
d3u8 = vld1_u8(s);
--- a/vpx_dsp/arm/loopfilter_mb_neon.asm
+++ b/vpx_dsp/arm/loopfilter_mb_neon.asm
@@ -8,27 +8,28 @@
; be found in the AUTHORS file in the root of the source tree.
;
- EXPORT |vpx_lpf_horizontal_16_neon|
+ EXPORT |vpx_lpf_horizontal_edge_8_neon|
+ EXPORT |vpx_lpf_horizontal_edge_16_neon|
EXPORT |vpx_lpf_vertical_16_neon|
ARM
AREA ||.text||, CODE, READONLY, ALIGN=2
-; void vpx_lpf_horizontal_16_neon(uint8_t *s, int p,
-; const uint8_t *blimit,
-; const uint8_t *limit,
-; const uint8_t *thresh
-; int count)
+; void mb_lpf_horizontal_edge(uint8_t *s, int p,
+; const uint8_t *blimit,
+; const uint8_t *limit,
+; const uint8_t *thresh,
+; int count)
; r0 uint8_t *s,
; r1 int p, /* pitch */
; r2 const uint8_t *blimit,
; r3 const uint8_t *limit,
; sp const uint8_t *thresh,
-|vpx_lpf_horizontal_16_neon| PROC
+; r12 int count
+|mb_lpf_horizontal_edge| PROC
     push        {r4-r8, lr}     vpush       {d8-d15}ldr r4, [sp, #88] ; load thresh
- ldr r12, [sp, #92] ; load count
h_count
     vld1.8      {d16[]}, [r2]              ; load *blimit@@ -115,7 +116,35 @@
     vpop        {d8-d15}     pop         {r4-r8, pc}- ENDP ; |vpx_lpf_horizontal_16_neon|
+ ENDP ; |mb_lpf_horizontal_edge|
+
+; void vpx_lpf_horizontal_edge_8_neon(uint8_t *s, int pitch,
+; const uint8_t *blimit,
+; const uint8_t *limit,
+; const uint8_t *thresh)
+; r0 uint8_t *s,
+; r1 int pitch,
+; r2 const uint8_t *blimit,
+; r3 const uint8_t *limit,
+; sp const uint8_t *thresh
+|vpx_lpf_horizontal_edge_8_neon| PROC
+ mov r12, #1
+ b mb_lpf_horizontal_edge
+ ENDP ; |vpx_lpf_horizontal_edge_8_neon|
+
+; void vpx_lpf_horizontal_edge_16_neon(uint8_t *s, int pitch,
+; const uint8_t *blimit,
+; const uint8_t *limit,
+; const uint8_t *thresh)
+; r0 uint8_t *s,
+; r1 int pitch,
+; r2 const uint8_t *blimit,
+; r3 const uint8_t *limit,
+; sp const uint8_t *thresh
+|vpx_lpf_horizontal_edge_16_neon| PROC
+ mov r12, #2
+ b mb_lpf_horizontal_edge
+ ENDP ; |vpx_lpf_horizontal_edge_16_neon|
; void vpx_lpf_vertical_16_neon(uint8_t *s, int p,
; const uint8_t *blimit,
--- a/vpx_dsp/arm/loopfilter_neon.c
+++ b/vpx_dsp/arm/loopfilter_neon.c
@@ -21,8 +21,8 @@
const uint8_t *blimit1,
const uint8_t *limit1,
                                   const uint8_t *thresh1) {- vpx_lpf_vertical_4_neon(s, p, blimit0, limit0, thresh0, 1);
- vpx_lpf_vertical_4_neon(s + 8 * p, p, blimit1, limit1, thresh1, 1);
+ vpx_lpf_vertical_4_neon(s, p, blimit0, limit0, thresh0);
+ vpx_lpf_vertical_4_neon(s + 8 * p, p, blimit1, limit1, thresh1);
}
#if HAVE_NEON_ASM
@@ -33,8 +33,8 @@
const uint8_t *blimit1,
const uint8_t *limit1,
                                     const uint8_t *thresh1) {- vpx_lpf_horizontal_8_neon(s, p, blimit0, limit0, thresh0, 1);
- vpx_lpf_horizontal_8_neon(s + 8, p, blimit1, limit1, thresh1, 1);
+ vpx_lpf_horizontal_8_neon(s, p, blimit0, limit0, thresh0);
+ vpx_lpf_horizontal_8_neon(s + 8, p, blimit1, limit1, thresh1);
}
void vpx_lpf_vertical_8_dual_neon(uint8_t *s, int p,
@@ -44,8 +44,8 @@
const uint8_t *blimit1,
const uint8_t *limit1,
                                   const uint8_t *thresh1) {- vpx_lpf_vertical_8_neon(s, p, blimit0, limit0, thresh0, 1);
- vpx_lpf_vertical_8_neon(s + 8 * p, p, blimit1, limit1, thresh1, 1);
+ vpx_lpf_vertical_8_neon(s, p, blimit0, limit0, thresh0);
+ vpx_lpf_vertical_8_neon(s + 8 * p, p, blimit1, limit1, thresh1);
}
void vpx_lpf_vertical_16_dual_neon(uint8_t *s, int p,
--- a/vpx_dsp/loopfilter.c
+++ b/vpx_dsp/loopfilter.c
@@ -119,12 +119,12 @@
void vpx_lpf_horizontal_4_c(uint8_t *s, int p /* pitch */,
const uint8_t *blimit, const uint8_t *limit,
-                            const uint8_t *thresh, int count) {+                            const uint8_t *thresh) {int i;
// loop filter designed to work using chars so that we can make maximum use
// of 8 bit simd instructions.
-  for (i = 0; i < 8 * count; ++i) {+  for (i = 0; i < 8; ++i) {const uint8_t p3 = s[-4 * p], p2 = s[-3 * p], p1 = s[-2 * p], p0 = s[-p];
const uint8_t q0 = s[0 * p], q1 = s[1 * p], q2 = s[2 * p], q3 = s[3 * p];
const int8_t mask = filter_mask(*limit, *blimit,
@@ -138,18 +138,17 @@
const uint8_t *limit0, const uint8_t *thresh0,
const uint8_t *blimit1, const uint8_t *limit1,
                                  const uint8_t *thresh1) {- vpx_lpf_horizontal_4_c(s, p, blimit0, limit0, thresh0, 1);
- vpx_lpf_horizontal_4_c(s + 8, p, blimit1, limit1, thresh1, 1);
+ vpx_lpf_horizontal_4_c(s, p, blimit0, limit0, thresh0);
+ vpx_lpf_horizontal_4_c(s + 8, p, blimit1, limit1, thresh1);
}
void vpx_lpf_vertical_4_c(uint8_t *s, int pitch, const uint8_t *blimit,
- const uint8_t *limit, const uint8_t *thresh,
-                          int count) {+                          const uint8_t *limit, const uint8_t *thresh) {int i;
// loop filter designed to work using chars so that we can make maximum use
// of 8 bit simd instructions.
-  for (i = 0; i < 8 * count; ++i) {+  for (i = 0; i < 8; ++i) {const uint8_t p3 = s[-4], p2 = s[-3], p1 = s[-2], p0 = s[-1];
const uint8_t q0 = s[0], q1 = s[1], q2 = s[2], q3 = s[3];
const int8_t mask = filter_mask(*limit, *blimit,
@@ -163,9 +162,8 @@
const uint8_t *limit0, const uint8_t *thresh0,
const uint8_t *blimit1, const uint8_t *limit1,
                                const uint8_t *thresh1) {- vpx_lpf_vertical_4_c(s, pitch, blimit0, limit0, thresh0, 1);
- vpx_lpf_vertical_4_c(s + 8 * pitch, pitch, blimit1, limit1,
- thresh1, 1);
+ vpx_lpf_vertical_4_c(s, pitch, blimit0, limit0, thresh0);
+ vpx_lpf_vertical_4_c(s + 8 * pitch, pitch, blimit1, limit1, thresh1);
}
static INLINE void filter8(int8_t mask, uint8_t thresh, uint8_t flat,
@@ -190,13 +188,12 @@
}
void vpx_lpf_horizontal_8_c(uint8_t *s, int p, const uint8_t *blimit,
- const uint8_t *limit, const uint8_t *thresh,
-                            int count) {+                            const uint8_t *limit, const uint8_t *thresh) {int i;
// loop filter designed to work using chars so that we can make maximum use
// of 8 bit simd instructions.
-  for (i = 0; i < 8 * count; ++i) {+  for (i = 0; i < 8; ++i) {const uint8_t p3 = s[-4 * p], p2 = s[-3 * p], p1 = s[-2 * p], p0 = s[-p];
const uint8_t q0 = s[0 * p], q1 = s[1 * p], q2 = s[2 * p], q3 = s[3 * p];
@@ -213,16 +210,15 @@
const uint8_t *limit0, const uint8_t *thresh0,
const uint8_t *blimit1, const uint8_t *limit1,
                                  const uint8_t *thresh1) {- vpx_lpf_horizontal_8_c(s, p, blimit0, limit0, thresh0, 1);
- vpx_lpf_horizontal_8_c(s + 8, p, blimit1, limit1, thresh1, 1);
+ vpx_lpf_horizontal_8_c(s, p, blimit0, limit0, thresh0);
+ vpx_lpf_horizontal_8_c(s + 8, p, blimit1, limit1, thresh1);
}
void vpx_lpf_vertical_8_c(uint8_t *s, int pitch, const uint8_t *blimit,
- const uint8_t *limit, const uint8_t *thresh,
-                          int count) {+                          const uint8_t *limit, const uint8_t *thresh) {int i;
-  for (i = 0; i < 8 * count; ++i) {+  for (i = 0; i < 8; ++i) {const uint8_t p3 = s[-4], p2 = s[-3], p1 = s[-2], p0 = s[-1];
const uint8_t q0 = s[0], q1 = s[1], q2 = s[2], q3 = s[3];
const int8_t mask = filter_mask(*limit, *blimit,
@@ -238,9 +234,8 @@
const uint8_t *limit0, const uint8_t *thresh0,
const uint8_t *blimit1, const uint8_t *limit1,
                                const uint8_t *thresh1) {- vpx_lpf_vertical_8_c(s, pitch, blimit0, limit0, thresh0, 1);
- vpx_lpf_vertical_8_c(s + 8 * pitch, pitch, blimit1, limit1,
- thresh1, 1);
+ vpx_lpf_vertical_8_c(s, pitch, blimit0, limit0, thresh0);
+ vpx_lpf_vertical_8_c(s + 8 * pitch, pitch, blimit1, limit1, thresh1);
}
static INLINE void filter16(int8_t mask, uint8_t thresh,
@@ -294,9 +289,9 @@
}
}
-void vpx_lpf_horizontal_16_c(uint8_t *s, int p, const uint8_t *blimit,
- const uint8_t *limit, const uint8_t *thresh,
-                             int count) {+static void mb_lpf_horizontal_edge_w(uint8_t *s, int p, const uint8_t *blimit,
+ const uint8_t *limit,
+                                     const uint8_t *thresh, int count) {int i;
// loop filter designed to work using chars so that we can make maximum use
@@ -320,6 +315,16 @@
}
}
+void vpx_lpf_horizontal_edge_8_c(uint8_t *s, int p, const uint8_t *blimit,
+                                 const uint8_t *limit, const uint8_t *thresh) {+ mb_lpf_horizontal_edge_w(s, p, blimit, limit, thresh, 1);
+}
+
+void vpx_lpf_horizontal_edge_16_c(uint8_t *s, int p, const uint8_t *blimit,
+                                  const uint8_t *limit, const uint8_t *thresh) {+ mb_lpf_horizontal_edge_w(s, p, blimit, limit, thresh, 2);
+}
+
static void mb_lpf_vertical_edge_w(uint8_t *s, int p,
const uint8_t *blimit,
const uint8_t *limit,
@@ -450,12 +455,12 @@
void vpx_highbd_lpf_horizontal_4_c(uint16_t *s, int p /* pitch */,
const uint8_t *blimit, const uint8_t *limit,
-                                   const uint8_t *thresh, int count, int bd) {+                                   const uint8_t *thresh, int bd) {int i;
// loop filter designed to work using chars so that we can make maximum use
// of 8 bit simd instructions.
-  for (i = 0; i < 8 * count; ++i) {+  for (i = 0; i < 8; ++i) {const uint16_t p3 = s[-4 * p];
const uint16_t p2 = s[-3 * p];
const uint16_t p1 = s[-2 * p];
@@ -479,18 +484,18 @@
const uint8_t *limit1,
const uint8_t *thresh1,
                                         int bd) {- vpx_highbd_lpf_horizontal_4_c(s, p, blimit0, limit0, thresh0, 1, bd);
- vpx_highbd_lpf_horizontal_4_c(s + 8, p, blimit1, limit1, thresh1, 1, bd);
+ vpx_highbd_lpf_horizontal_4_c(s, p, blimit0, limit0, thresh0, bd);
+ vpx_highbd_lpf_horizontal_4_c(s + 8, p, blimit1, limit1, thresh1, bd);
}
void vpx_highbd_lpf_vertical_4_c(uint16_t *s, int pitch, const uint8_t *blimit,
const uint8_t *limit, const uint8_t *thresh,
-                                 int count, int bd) {+                                 int bd) {int i;
// loop filter designed to work using chars so that we can make maximum use
// of 8 bit simd instructions.
-  for (i = 0; i < 8 * count; ++i) {+  for (i = 0; i < 8; ++i) {const uint16_t p3 = s[-4], p2 = s[-3], p1 = s[-2], p0 = s[-1];
const uint16_t q0 = s[0], q1 = s[1], q2 = s[2], q3 = s[3];
const int8_t mask = highbd_filter_mask(*limit, *blimit,
@@ -508,9 +513,9 @@
const uint8_t *limit1,
const uint8_t *thresh1,
                                       int bd) {- vpx_highbd_lpf_vertical_4_c(s, pitch, blimit0, limit0, thresh0, 1, bd);
+ vpx_highbd_lpf_vertical_4_c(s, pitch, blimit0, limit0, thresh0, bd);
vpx_highbd_lpf_vertical_4_c(s + 8 * pitch, pitch, blimit1, limit1,
- thresh1, 1, bd);
+ thresh1, bd);
}
static INLINE void highbd_filter8(int8_t mask, uint8_t thresh, uint8_t flat,
@@ -536,12 +541,12 @@
void vpx_highbd_lpf_horizontal_8_c(uint16_t *s, int p, const uint8_t *blimit,
const uint8_t *limit, const uint8_t *thresh,
-                                   int count, int bd) {+                                   int bd) {int i;
// loop filter designed to work using chars so that we can make maximum use
// of 8 bit simd instructions.
-  for (i = 0; i < 8 * count; ++i) {+  for (i = 0; i < 8; ++i) {const uint16_t p3 = s[-4 * p], p2 = s[-3 * p], p1 = s[-2 * p], p0 = s[-p];
const uint16_t q0 = s[0 * p], q1 = s[1 * p], q2 = s[2 * p], q3 = s[3 * p];
@@ -564,16 +569,16 @@
const uint8_t *limit1,
const uint8_t *thresh1,
                                         int bd) {- vpx_highbd_lpf_horizontal_8_c(s, p, blimit0, limit0, thresh0, 1, bd);
- vpx_highbd_lpf_horizontal_8_c(s + 8, p, blimit1, limit1, thresh1, 1, bd);
+ vpx_highbd_lpf_horizontal_8_c(s, p, blimit0, limit0, thresh0, bd);
+ vpx_highbd_lpf_horizontal_8_c(s + 8, p, blimit1, limit1, thresh1, bd);
}
void vpx_highbd_lpf_vertical_8_c(uint16_t *s, int pitch, const uint8_t *blimit,
const uint8_t *limit, const uint8_t *thresh,
-                                 int count, int bd) {+                                 int bd) {int i;
-  for (i = 0; i < 8 * count; ++i) {+  for (i = 0; i < 8; ++i) {const uint16_t p3 = s[-4], p2 = s[-3], p1 = s[-2], p0 = s[-1];
const uint16_t q0 = s[0], q1 = s[1], q2 = s[2], q3 = s[3];
const int8_t mask = highbd_filter_mask(*limit, *blimit,
@@ -596,9 +601,9 @@
const uint8_t *limit1,
const uint8_t *thresh1,
                                       int bd) {- vpx_highbd_lpf_vertical_8_c(s, pitch, blimit0, limit0, thresh0, 1, bd);
+ vpx_highbd_lpf_vertical_8_c(s, pitch, blimit0, limit0, thresh0, bd);
vpx_highbd_lpf_vertical_8_c(s + 8 * pitch, pitch, blimit1, limit1,
- thresh1, 1, bd);
+ thresh1, bd);
}
static INLINE void highbd_filter16(int8_t mask, uint8_t thresh,
@@ -664,9 +669,11 @@
}
}
-void vpx_highbd_lpf_horizontal_16_c(uint16_t *s, int p, const uint8_t *blimit,
- const uint8_t *limit, const uint8_t *thresh,
-                                    int count, int bd) {+static void highbd_mb_lpf_horizontal_edge_w(uint16_t *s, int p,
+ const uint8_t *blimit,
+ const uint8_t *limit,
+ const uint8_t *thresh,
+                                            int count, int bd) {int i;
// loop filter designed to work using chars so that we can make maximum use
@@ -696,6 +703,20 @@
bd);
++s;
}
+}
+
+void vpx_highbd_lpf_horizontal_edge_8_c(uint16_t *s, int p,
+ const uint8_t *blimit,
+ const uint8_t *limit,
+                                        const uint8_t *thresh, int bd) {+ highbd_mb_lpf_horizontal_edge_w(s, p, blimit, limit, thresh, 1, bd);
+}
+
+void vpx_highbd_lpf_horizontal_edge_16_c(uint16_t *s, int p,
+ const uint8_t *blimit,
+ const uint8_t *limit,
+                                         const uint8_t *thresh, int bd) {+ highbd_mb_lpf_horizontal_edge_w(s, p, blimit, limit, thresh, 2, bd);
}
static void highbd_mb_lpf_vertical_edge_w(uint16_t *s, int p,
--- a/vpx_dsp/mips/loopfilter_16_msa.c
+++ b/vpx_dsp/mips/loopfilter_16_msa.c
@@ -423,11 +423,11 @@
}
}
-void vpx_lpf_horizontal_16_msa(uint8_t *src, int32_t pitch,
- const uint8_t *b_limit_ptr,
- const uint8_t *limit_ptr,
- const uint8_t *thresh_ptr,
-                               int32_t count) {+static void mb_lpf_horizontal_edge(uint8_t *src, int32_t pitch,
+ const uint8_t *b_limit_ptr,
+ const uint8_t *limit_ptr,
+ const uint8_t *thresh_ptr,
+                                   int32_t count) {   if (1 == count) {uint64_t p2_d, p1_d, p0_d, q0_d, q1_d, q2_d;
uint64_t dword0, dword1;
@@ -646,6 +646,20 @@
vpx_lpf_horizontal_16_dual_msa(src, pitch, b_limit_ptr, limit_ptr,
thresh_ptr, count);
}
+}
+
+void vpx_lpf_horizontal_edge_8_msa(uint8_t *src, int32_t pitch,
+ const uint8_t *b_limit_ptr,
+ const uint8_t *limit_ptr,
+                                   const uint8_t *thresh_ptr) {+ mb_lpf_horizontal_edge(src, pitch, b_limit_ptr, limit_ptr, thresh_ptr, 1);
+}
+
+void vpx_lpf_horizontal_edge_16_msa(uint8_t *src, int32_t pitch,
+ const uint8_t *b_limit_ptr,
+ const uint8_t *limit_ptr,
+                                    const uint8_t *thresh_ptr) {+ mb_lpf_horizontal_edge(src, pitch, b_limit_ptr, limit_ptr, thresh_ptr, 2);
}
static void transpose_16x8_to_8x16(uint8_t *input, int32_t in_pitch,
--- a/vpx_dsp/mips/loopfilter_4_msa.c
+++ b/vpx_dsp/mips/loopfilter_4_msa.c
@@ -13,14 +13,11 @@
void vpx_lpf_horizontal_4_msa(uint8_t *src, int32_t pitch,
const uint8_t *b_limit_ptr,
const uint8_t *limit_ptr,
- const uint8_t *thresh_ptr,
-                              int32_t count) {+                              const uint8_t *thresh_ptr) {uint64_t p1_d, p0_d, q0_d, q1_d;
v16u8 mask, hev, flat, thresh, b_limit, limit;
v16u8 p3, p2, p1, p0, q3, q2, q1, q0, p1_out, p0_out, q0_out, q1_out;
- (void)count;
-
/* load vector elements */
LD_UB8((src - 4 * pitch), pitch, p3, p2, p1, p0, q0, q1, q2, q3);
@@ -74,13 +71,10 @@
void vpx_lpf_vertical_4_msa(uint8_t *src, int32_t pitch,
const uint8_t *b_limit_ptr,
const uint8_t *limit_ptr,
- const uint8_t *thresh_ptr,
-                            int32_t count) {+                            const uint8_t *thresh_ptr) {v16u8 mask, hev, flat, limit, thresh, b_limit;
v16u8 p3, p2, p1, p0, q3, q2, q1, q0;
v8i16 vec0, vec1, vec2, vec3;
-
- (void)count;
LD_UB8((src - 4), pitch, p3, p2, p1, p0, q0, q1, q2, q3);
--- a/vpx_dsp/mips/loopfilter_8_msa.c
+++ b/vpx_dsp/mips/loopfilter_8_msa.c
@@ -13,8 +13,7 @@
void vpx_lpf_horizontal_8_msa(uint8_t *src, int32_t pitch,
const uint8_t *b_limit_ptr,
const uint8_t *limit_ptr,
- const uint8_t *thresh_ptr,
-                              int32_t count) {+                              const uint8_t *thresh_ptr) {uint64_t p2_d, p1_d, p0_d, q0_d, q1_d, q2_d;
v16u8 mask, hev, flat, thresh, b_limit, limit;
v16u8 p3, p2, p1, p0, q3, q2, q1, q0;
@@ -23,8 +22,6 @@
v8u16 p3_r, p2_r, p1_r, p0_r, q3_r, q2_r, q1_r, q0_r;
   v16i8 zero = { 0 };- (void)count;
-
/* load vector elements */
LD_UB8((src - 4 * pitch), pitch, p3, p2, p1, p0, q0, q1, q2, q3);
@@ -161,8 +158,7 @@
void vpx_lpf_vertical_8_msa(uint8_t *src, int32_t pitch,
const uint8_t *b_limit_ptr,
const uint8_t *limit_ptr,
- const uint8_t *thresh_ptr,
-                            int32_t count) {+                            const uint8_t *thresh_ptr) {v16u8 p3, p2, p1, p0, q3, q2, q1, q0;
v16u8 p1_out, p0_out, q0_out, q1_out;
v16u8 flat, mask, hev, thresh, b_limit, limit;
@@ -170,8 +166,6 @@
v8i16 p2_filt8_r, p1_filt8_r, p0_filt8_r, q0_filt8_r, q1_filt8_r, q2_filt8_r;
   v16u8 zero = { 0 };v8i16 vec0, vec1, vec2, vec3, vec4;
-
- (void)count;
/* load vector elements */
LD_UB8(src - 4, pitch, p3, p2, p1, p0, q0, q1, q2, q3);
--- a/vpx_dsp/mips/loopfilter_filters_dspr2.c
+++ b/vpx_dsp/mips/loopfilter_filters_dspr2.c
@@ -23,8 +23,7 @@
int pitch,
const uint8_t *blimit,
const uint8_t *limit,
- const uint8_t *thresh,
-                                int count) {+                                const uint8_t *thresh) {uint8_t i;
uint32_t mask;
uint32_t hev;
@@ -117,8 +116,7 @@
int pitch,
const uint8_t *blimit,
const uint8_t *limit,
- const uint8_t *thresh,
-                              int count) {+                              const uint8_t *thresh) {uint8_t i;
uint32_t mask, hev;
uint32_t pm1, p0, p1, p2, p3, p4, p5, p6;
@@ -313,8 +311,8 @@
const uint8_t *blimit1,
const uint8_t *limit1,
                                      const uint8_t *thresh1) {- vpx_lpf_horizontal_4_dspr2(s, p, blimit0, limit0, thresh0, 1);
- vpx_lpf_horizontal_4_dspr2(s + 8, p, blimit1, limit1, thresh1, 1);
+ vpx_lpf_horizontal_4_dspr2(s, p, blimit0, limit0, thresh0);
+ vpx_lpf_horizontal_4_dspr2(s + 8, p, blimit1, limit1, thresh1);
}
void vpx_lpf_horizontal_8_dual_dspr2(uint8_t *s, int p /* pitch */,
@@ -324,8 +322,8 @@
const uint8_t *blimit1,
const uint8_t *limit1,
                                      const uint8_t *thresh1) {- vpx_lpf_horizontal_8_dspr2(s, p, blimit0, limit0, thresh0, 1);
- vpx_lpf_horizontal_8_dspr2(s + 8, p, blimit1, limit1, thresh1, 1);
+ vpx_lpf_horizontal_8_dspr2(s, p, blimit0, limit0, thresh0);
+ vpx_lpf_horizontal_8_dspr2(s + 8, p, blimit1, limit1, thresh1);
}
void vpx_lpf_vertical_4_dual_dspr2(uint8_t *s, int p,
@@ -335,8 +333,8 @@
const uint8_t *blimit1,
const uint8_t *limit1,
                                    const uint8_t *thresh1) {- vpx_lpf_vertical_4_dspr2(s, p, blimit0, limit0, thresh0, 1);
- vpx_lpf_vertical_4_dspr2(s + 8 * p, p, blimit1, limit1, thresh1, 1);
+ vpx_lpf_vertical_4_dspr2(s, p, blimit0, limit0, thresh0);
+ vpx_lpf_vertical_4_dspr2(s + 8 * p, p, blimit1, limit1, thresh1);
}
void vpx_lpf_vertical_8_dual_dspr2(uint8_t *s, int p,
@@ -346,9 +344,8 @@
const uint8_t *blimit1,
const uint8_t *limit1,
                                    const uint8_t *thresh1) {- vpx_lpf_vertical_8_dspr2(s, p, blimit0, limit0, thresh0, 1);
- vpx_lpf_vertical_8_dspr2(s + 8 * p, p, blimit1, limit1, thresh1,
- 1);
+ vpx_lpf_vertical_8_dspr2(s, p, blimit0, limit0, thresh0);
+ vpx_lpf_vertical_8_dspr2(s + 8 * p, p, blimit1, limit1, thresh1);
}
void vpx_lpf_vertical_16_dual_dspr2(uint8_t *s, int p,
--- a/vpx_dsp/mips/loopfilter_mb_dspr2.c
+++ b/vpx_dsp/mips/loopfilter_mb_dspr2.c
@@ -23,8 +23,7 @@
int pitch,
const uint8_t *blimit,
const uint8_t *limit,
- const uint8_t *thresh,
-                                int count) {+                                const uint8_t *thresh) {uint32_t mask;
uint32_t hev, flat;
uint8_t i;
@@ -322,8 +321,7 @@
int pitch,
const uint8_t *blimit,
const uint8_t *limit,
- const uint8_t *thresh,
-                              int count) {+                              const uint8_t *thresh) {uint8_t i;
uint32_t mask, hev, flat;
uint8_t *s1, *s2, *s3, *s4;
--- a/vpx_dsp/mips/loopfilter_mb_horiz_dspr2.c
+++ b/vpx_dsp/mips/loopfilter_mb_horiz_dspr2.c
@@ -19,12 +19,12 @@
#include "vpx_mem/vpx_mem.h"
#if HAVE_DSPR2
-void vpx_lpf_horizontal_16_dspr2(unsigned char *s,
- int pitch,
- const uint8_t *blimit,
- const uint8_t *limit,
- const uint8_t *thresh,
-                                 int count) {+static void mb_lpf_horizontal_edge(unsigned char *s,
+ int pitch,
+ const uint8_t *blimit,
+ const uint8_t *limit,
+ const uint8_t *thresh,
+                                   int count) {uint32_t mask;
uint32_t hev, flat, flat2;
uint8_t i;
@@ -790,5 +790,19 @@
s = s + 4;
}
+}
+
+void vpx_lpf_horizontal_edge_8_dspr2(unsigned char *s, int pitch,
+ const uint8_t *blimit,
+ const uint8_t *limit,
+                                     const uint8_t *thresh) {+ mb_lpf_horizontal_edge(s, pitch, blimit, limit, thresh, 1);
+}
+
+void vpx_lpf_horizontal_edge_16_dspr2(unsigned char *s, int pitch,
+ const uint8_t *blimit,
+ const uint8_t *limit,
+                                      const uint8_t *thresh) {+ mb_lpf_horizontal_edge(s, pitch, blimit, limit, thresh, 2);
}
#endif // #if HAVE_DSPR2
--- a/vpx_dsp/vpx_dsp_rtcd_defs.pl
+++ b/vpx_dsp/vpx_dsp_rtcd_defs.pl
@@ -535,7 +535,7 @@
specialize qw/vpx_lpf_vertical_16_dual sse2 neon_asm dspr2 msa/;
$vpx_lpf_vertical_16_dual_neon_asm=vpx_lpf_vertical_16_dual_neon;
-add_proto qw/void vpx_lpf_vertical_8/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count";
+add_proto qw/void vpx_lpf_vertical_8/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh";
specialize qw/vpx_lpf_vertical_8 sse2 neon dspr2 msa/;
add_proto qw/void vpx_lpf_vertical_8_dual/, "uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1";
@@ -542,17 +542,21 @@
specialize qw/vpx_lpf_vertical_8_dual sse2 neon_asm dspr2 msa/;
$vpx_lpf_vertical_8_dual_neon_asm=vpx_lpf_vertical_8_dual_neon;
-add_proto qw/void vpx_lpf_vertical_4/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count";
+add_proto qw/void vpx_lpf_vertical_4/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh";
specialize qw/vpx_lpf_vertical_4 mmx neon dspr2 msa/;
add_proto qw/void vpx_lpf_vertical_4_dual/, "uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1";
specialize qw/vpx_lpf_vertical_4_dual sse2 neon dspr2 msa/;
-add_proto qw/void vpx_lpf_horizontal_16/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count";
-specialize qw/vpx_lpf_horizontal_16 sse2 avx2 neon_asm dspr2 msa/;
-$vpx_lpf_horizontal_16_neon_asm=vpx_lpf_horizontal_16_neon;
+add_proto qw/void vpx_lpf_horizontal_edge_8/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh";
+specialize qw/vpx_lpf_horizontal_edge_8 sse2 avx2 neon_asm dspr2 msa/;
+$vpx_lpf_horizontal_edge_8_neon_asm=vpx_lpf_horizontal_edge_8_neon;
-add_proto qw/void vpx_lpf_horizontal_8/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count";
+add_proto qw/void vpx_lpf_horizontal_edge_16/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh";
+specialize qw/vpx_lpf_horizontal_edge_16 sse2 avx2 neon_asm dspr2 msa/;
+$vpx_lpf_horizontal_edge_16_neon_asm=vpx_lpf_horizontal_edge_16_neon;
+
+add_proto qw/void vpx_lpf_horizontal_8/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh";
specialize qw/vpx_lpf_horizontal_8 sse2 neon dspr2 msa/;
add_proto qw/void vpx_lpf_horizontal_8_dual/, "uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1";
@@ -559,7 +563,7 @@
specialize qw/vpx_lpf_horizontal_8_dual sse2 neon_asm dspr2 msa/;
$vpx_lpf_horizontal_8_dual_neon_asm=vpx_lpf_horizontal_8_dual_neon;
-add_proto qw/void vpx_lpf_horizontal_4/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count";
+add_proto qw/void vpx_lpf_horizontal_4/, "uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh";
specialize qw/vpx_lpf_horizontal_4 mmx neon dspr2 msa/;
add_proto qw/void vpx_lpf_horizontal_4_dual/, "uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1";
@@ -572,28 +576,31 @@
add_proto qw/void vpx_highbd_lpf_vertical_16_dual/, "uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd";
specialize qw/vpx_highbd_lpf_vertical_16_dual sse2/;
- add_proto qw/void vpx_highbd_lpf_vertical_8/, "uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count, int bd";
+ add_proto qw/void vpx_highbd_lpf_vertical_8/, "uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd";
specialize qw/vpx_highbd_lpf_vertical_8 sse2/;
add_proto qw/void vpx_highbd_lpf_vertical_8_dual/, "uint16_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1, int bd";
specialize qw/vpx_highbd_lpf_vertical_8_dual sse2/;
- add_proto qw/void vpx_highbd_lpf_vertical_4/, "uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count, int bd";
+ add_proto qw/void vpx_highbd_lpf_vertical_4/, "uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd";
specialize qw/vpx_highbd_lpf_vertical_4 sse2/;
add_proto qw/void vpx_highbd_lpf_vertical_4_dual/, "uint16_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1, int bd";
specialize qw/vpx_highbd_lpf_vertical_4_dual sse2/;
- add_proto qw/void vpx_highbd_lpf_horizontal_16/, "uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count, int bd";
- specialize qw/vpx_highbd_lpf_horizontal_16 sse2/;
+ add_proto qw/void vpx_highbd_lpf_horizontal_edge_8/, "uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd";
+ specialize qw/vpx_highbd_lpf_horizontal_edge_8 sse2/;
- add_proto qw/void vpx_highbd_lpf_horizontal_8/, "uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count, int bd";
+ add_proto qw/void vpx_highbd_lpf_horizontal_edge_16/, "uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd";
+ specialize qw/vpx_highbd_lpf_horizontal_edge_16 sse2/;
+
+ add_proto qw/void vpx_highbd_lpf_horizontal_8/, "uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd";
specialize qw/vpx_highbd_lpf_horizontal_8 sse2/;
add_proto qw/void vpx_highbd_lpf_horizontal_8_dual/, "uint16_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1, int bd";
specialize qw/vpx_highbd_lpf_horizontal_8_dual sse2/;
- add_proto qw/void vpx_highbd_lpf_horizontal_4/, "uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count, int bd";
+ add_proto qw/void vpx_highbd_lpf_horizontal_4/, "uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd";
specialize qw/vpx_highbd_lpf_horizontal_4 sse2/;
add_proto qw/void vpx_highbd_lpf_horizontal_4_dual/, "uint16_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1, int bd";
--- a/vpx_dsp/x86/highbd_loopfilter_sse2.c
+++ b/vpx_dsp/x86/highbd_loopfilter_sse2.c
@@ -51,12 +51,10 @@
// TODO(debargha, peter): Break up large functions into smaller ones
// in this file.
-static void highbd_mb_lpf_horizontal_edge_w_sse2_8(uint16_t *s,
- int p,
- const uint8_t *_blimit,
- const uint8_t *_limit,
- const uint8_t *_thresh,
-                                                   int bd) {+void vpx_highbd_lpf_horizontal_edge_8_sse2(uint16_t *s, int p,
+ const uint8_t *_blimit,
+ const uint8_t *_limit,
+                                           const uint8_t *_thresh, int bd) {const __m128i zero = _mm_set1_epi16(0);
const __m128i one = _mm_set1_epi16(1);
__m128i blimit, limit, thresh;
@@ -496,34 +494,19 @@
_mm_store_si128((__m128i *)(s - 0 * p), q0);
}
-static void highbd_mb_lpf_horizontal_edge_w_sse2_16(uint16_t *s,
- int p,
- const uint8_t *_blimit,
- const uint8_t *_limit,
- const uint8_t *_thresh,
-                                                    int bd) {- highbd_mb_lpf_horizontal_edge_w_sse2_8(s, p, _blimit, _limit, _thresh, bd);
- highbd_mb_lpf_horizontal_edge_w_sse2_8(s + 8, p, _blimit, _limit, _thresh,
- bd);
+void vpx_highbd_lpf_horizontal_edge_16_sse2(uint16_t *s, int p,
+ const uint8_t *_blimit,
+ const uint8_t *_limit,
+                                            const uint8_t *_thresh, int bd) {+ vpx_highbd_lpf_horizontal_edge_8_sse2(s, p, _blimit, _limit, _thresh, bd);
+ vpx_highbd_lpf_horizontal_edge_8_sse2(s + 8, p, _blimit, _limit, _thresh, bd);
}
-// TODO(yunqingwang): remove count and call these 2 functions(8 or 16) directly.
-void vpx_highbd_lpf_horizontal_16_sse2(uint16_t *s, int p,
- const uint8_t *_blimit,
- const uint8_t *_limit,
- const uint8_t *_thresh,
-                                       int count, int bd) {- if (count == 1)
- highbd_mb_lpf_horizontal_edge_w_sse2_8(s, p, _blimit, _limit, _thresh, bd);
- else
- highbd_mb_lpf_horizontal_edge_w_sse2_16(s, p, _blimit, _limit, _thresh, bd);
-}
-
void vpx_highbd_lpf_horizontal_8_sse2(uint16_t *s, int p,
const uint8_t *_blimit,
const uint8_t *_limit,
const uint8_t *_thresh,
-                                      int count, int bd) {+                                      int bd) {DECLARE_ALIGNED(16, uint16_t, flat_op2[16]);
DECLARE_ALIGNED(16, uint16_t, flat_op1[16]);
DECLARE_ALIGNED(16, uint16_t, flat_op0[16]);
@@ -556,8 +539,6 @@
__m128i work_a;
__m128i filter1, filter2;
- (void)count;
-
   if (bd == 8) {blimit = _mm_unpacklo_epi8(_mm_load_si128((const __m128i *)_blimit), zero);
limit = _mm_unpacklo_epi8(_mm_load_si128((const __m128i *)_limit), zero);
@@ -764,9 +745,8 @@
const uint8_t *_limit1,
const uint8_t *_thresh1,
                                            int bd) {- vpx_highbd_lpf_horizontal_8_sse2(s, p, _blimit0, _limit0, _thresh0, 1, bd);
- vpx_highbd_lpf_horizontal_8_sse2(s + 8, p, _blimit1, _limit1, _thresh1,
- 1, bd);
+ vpx_highbd_lpf_horizontal_8_sse2(s, p, _blimit0, _limit0, _thresh0, bd);
+ vpx_highbd_lpf_horizontal_8_sse2(s + 8, p, _blimit1, _limit1, _thresh1, bd);
}
void vpx_highbd_lpf_horizontal_4_sse2(uint16_t *s, int p,
@@ -773,7 +753,7 @@
const uint8_t *_blimit,
const uint8_t *_limit,
const uint8_t *_thresh,
-                                      int count, int bd) {+                                      int bd) {const __m128i zero = _mm_set1_epi16(0);
__m128i blimit, limit, thresh;
__m128i mask, hev, flat;
@@ -813,8 +793,6 @@
__m128i work_a;
__m128i filter1, filter2;
- (void)count;
-
   if (bd == 8) {blimit = _mm_unpacklo_epi8(_mm_load_si128((const __m128i *)_blimit), zero);
limit = _mm_unpacklo_epi8(_mm_load_si128((const __m128i *)_limit), zero);
@@ -944,9 +922,8 @@
const uint8_t *_limit1,
const uint8_t *_thresh1,
                                            int bd) {- vpx_highbd_lpf_horizontal_4_sse2(s, p, _blimit0, _limit0, _thresh0, 1, bd);
- vpx_highbd_lpf_horizontal_4_sse2(s + 8, p, _blimit1, _limit1, _thresh1, 1,
- bd);
+ vpx_highbd_lpf_horizontal_4_sse2(s, p, _blimit0, _limit0, _thresh0, bd);
+ vpx_highbd_lpf_horizontal_4_sse2(s + 8, p, _blimit1, _limit1, _thresh1, bd);
}
static INLINE void highbd_transpose(uint16_t *src[], int in_p,
@@ -1058,11 +1035,10 @@
const uint8_t *blimit,
const uint8_t *limit,
const uint8_t *thresh,
-                                    int count, int bd) {+                                    int bd) {DECLARE_ALIGNED(16, uint16_t, t_dst[8 * 8]);
uint16_t *src[1];
uint16_t *dst[1];
- (void)count;
// Transpose 8x8
src[0] = s - 4;
@@ -1071,8 +1047,7 @@
highbd_transpose(src, p, dst, 8, 1);
// Loop filtering
- vpx_highbd_lpf_horizontal_4_sse2(t_dst + 4 * 8, 8, blimit, limit, thresh, 1,
- bd);
+ vpx_highbd_lpf_horizontal_4_sse2(t_dst + 4 * 8, 8, blimit, limit, thresh, bd);
src[0] = t_dst;
dst[0] = s - 4;
@@ -1112,11 +1087,10 @@
const uint8_t *blimit,
const uint8_t *limit,
const uint8_t *thresh,
-                                    int count, int bd) {+                                    int bd) {DECLARE_ALIGNED(16, uint16_t, t_dst[8 * 8]);
uint16_t *src[1];
uint16_t *dst[1];
- (void)count;
// Transpose 8x8
src[0] = s - 4;
@@ -1125,8 +1099,7 @@
highbd_transpose(src, p, dst, 8, 1);
// Loop filtering
- vpx_highbd_lpf_horizontal_8_sse2(t_dst + 4 * 8, 8, blimit, limit, thresh, 1,
- bd);
+ vpx_highbd_lpf_horizontal_8_sse2(t_dst + 4 * 8, 8, blimit, limit, thresh, bd);
src[0] = t_dst;
dst[0] = s - 4;
@@ -1181,8 +1154,8 @@
highbd_transpose(src, p, dst, 8, 2);
// Loop filtering
- highbd_mb_lpf_horizontal_edge_w_sse2_8(t_dst + 8 * 8, 8, blimit, limit,
- thresh, bd);
+ vpx_highbd_lpf_horizontal_edge_8_sse2(t_dst + 8 * 8, 8, blimit, limit,
+ thresh, bd);
src[0] = t_dst;
src[1] = t_dst + 8 * 8;
dst[0] = s - 8;
@@ -1205,8 +1178,8 @@
highbd_transpose8x16(s, s + 8 * p, p, t_dst + 8 * 16, 16);
// Loop filtering
- highbd_mb_lpf_horizontal_edge_w_sse2_16(t_dst + 8 * 16, 16, blimit, limit,
- thresh, bd);
+ vpx_highbd_lpf_horizontal_edge_16_sse2(t_dst + 8 * 16, 16, blimit, limit,
+ thresh, bd);
// Transpose back
highbd_transpose8x16(t_dst, t_dst + 8 * 16, 16, s - 8, p);
--- a/vpx_dsp/x86/loopfilter_avx2.c
+++ b/vpx_dsp/x86/loopfilter_avx2.c
@@ -13,9 +13,10 @@
#include "./vpx_dsp_rtcd.h"
#include "vpx_ports/mem.h"
-static void mb_lpf_horizontal_edge_w_avx2_8(unsigned char *s, int p,
- const unsigned char *_blimit, const unsigned char *_limit,
-        const unsigned char *_thresh) {+void vpx_lpf_horizontal_edge_8_avx2(unsigned char *s, int p,
+ const unsigned char *_blimit,
+ const unsigned char *_limit,
+                                    const unsigned char *_thresh) {__m128i mask, hev, flat, flat2;
const __m128i zero = _mm_set1_epi16(0);
const __m128i one = _mm_set1_epi8(1);
@@ -400,9 +401,10 @@
8, 128, 9, 128, 10, 128, 11, 128, 12, 128, 13, 128, 14, 128, 15, 128
};
-static void mb_lpf_horizontal_edge_w_avx2_16(unsigned char *s, int p,
- const unsigned char *_blimit, const unsigned char *_limit,
-        const unsigned char *_thresh) {+void vpx_lpf_horizontal_edge_16_avx2(unsigned char *s, int p,
+ const unsigned char *_blimit,
+ const unsigned char *_limit,
+                                     const unsigned char *_thresh) {__m128i mask, hev, flat, flat2;
const __m128i zero = _mm_set1_epi16(0);
const __m128i one = _mm_set1_epi8(1);
@@ -974,13 +976,4 @@
q6 = _mm_or_si128(flat2_q6, q6);
_mm_storeu_si128((__m128i *) (s + 6 * p), q6);
}
-}
-
-void vpx_lpf_horizontal_16_avx2(unsigned char *s, int p,
- const unsigned char *_blimit, const unsigned char *_limit,
-        const unsigned char *_thresh, int count) {- if (count == 1)
- mb_lpf_horizontal_edge_w_avx2_8(s, p, _blimit, _limit, _thresh);
- else
- mb_lpf_horizontal_edge_w_avx2_16(s, p, _blimit, _limit, _thresh);
}
--- a/vpx_dsp/x86/loopfilter_mmx.asm
+++ b/vpx_dsp/x86/loopfilter_mmx.asm
@@ -18,14 +18,13 @@
; int src_pixel_step,
; const char *blimit,
; const char *limit,
-; const char *thresh,
-; int count
+; const char *thresh
;)
global sym(vpx_lpf_horizontal_4_mmx) PRIVATE
sym(vpx_lpf_horizontal_4_mmx):
push rbp
mov rbp, rsp
- SHADOW_ARGS_TO_STACK 6
+ SHADOW_ARGS_TO_STACK 5
GET_GOT rbx
push rsi
push rdi
@@ -39,8 +38,6 @@
mov rsi, arg(0) ;src_ptr
movsxd rax, dword ptr arg(1) ;src_pixel_step ; destination pitch?
- movsxd rcx, dword ptr arg(5) ;count
-.next8_h:
mov rdx, arg(3) ;limit
movq mm7, [rdx]
mov rdi, rsi ; rdi points to row +1 for indirect addressing
@@ -208,11 +205,6 @@
pxor mm7, [GLOBAL(t80)] ; unoffset
movq [rdi], mm7 ; write back
- add rsi,8
- neg rax
- dec rcx
- jnz .next8_h
-
add rsp, 32
pop rsp
; begin epilog
@@ -230,14 +222,13 @@
; int src_pixel_step,
; const char *blimit,
; const char *limit,
-; const char *thresh,
-; int count
+; const char *thresh
;)
global sym(vpx_lpf_vertical_4_mmx) PRIVATE
sym(vpx_lpf_vertical_4_mmx):
push rbp
mov rbp, rsp
- SHADOW_ARGS_TO_STACK 6
+ SHADOW_ARGS_TO_STACK 5
GET_GOT rbx
push rsi
push rdi
@@ -254,8 +245,6 @@
lea rsi, [rsi + rax*4 - 4]
- movsxd rcx, dword ptr arg(5) ;count
-.next8_v:
mov rdi, rsi ; rdi points to row +1 for indirect addressing
add rdi, rax
@@ -578,10 +567,6 @@
psrlq mm5, 32
movd [rdi+rax*2+2], mm5
-
- lea rsi, [rsi+rax*8]
- dec rcx
- jnz .next8_v
add rsp, 64
pop rsp
--- a/vpx_dsp/x86/loopfilter_sse2.c
+++ b/vpx_dsp/x86/loopfilter_sse2.c
@@ -18,11 +18,10 @@
return _mm_or_si128(_mm_subs_epu8(a, b), _mm_subs_epu8(b, a));
}
-static void mb_lpf_horizontal_edge_w_sse2_8(unsigned char *s,
- int p,
- const unsigned char *_blimit,
- const unsigned char *_limit,
-                                            const unsigned char *_thresh) {+void vpx_lpf_horizontal_edge_8_sse2(unsigned char *s, int p,
+ const unsigned char *_blimit,
+ const unsigned char *_limit,
+                                    const unsigned char *_thresh) {const __m128i zero = _mm_set1_epi16(0);
const __m128i one = _mm_set1_epi8(1);
const __m128i blimit = _mm_load_si128((const __m128i *)_blimit);
@@ -383,11 +382,10 @@
return _mm_or_si128(_mm_andnot_si128(*flat, *other_filt), result);
}
-static void mb_lpf_horizontal_edge_w_sse2_16(unsigned char *s,
- int p,
- const unsigned char *_blimit,
- const unsigned char *_limit,
-                                             const unsigned char *_thresh) {+void vpx_lpf_horizontal_edge_16_sse2(unsigned char *s, int p,
+ const unsigned char *_blimit,
+ const unsigned char *_limit,
+                                     const unsigned char *_thresh) {const __m128i zero = _mm_set1_epi16(0);
const __m128i one = _mm_set1_epi8(1);
const __m128i blimit = _mm_load_si128((const __m128i *)_blimit);
@@ -716,21 +714,10 @@
}
}
-// TODO(yunqingwang): remove count and call these 2 functions(8 or 16) directly.
-void vpx_lpf_horizontal_16_sse2(unsigned char *s, int p,
- const unsigned char *_blimit,
- const unsigned char *_limit,
-                                const unsigned char *_thresh, int count) {- if (count == 1)
- mb_lpf_horizontal_edge_w_sse2_8(s, p, _blimit, _limit, _thresh);
- else
- mb_lpf_horizontal_edge_w_sse2_16(s, p, _blimit, _limit, _thresh);
-}
-
void vpx_lpf_horizontal_8_sse2(unsigned char *s, int p,
const unsigned char *_blimit,
const unsigned char *_limit,
-                               const unsigned char *_thresh, int count) {+                               const unsigned char *_thresh) {DECLARE_ALIGNED(16, unsigned char, flat_op2[16]);
DECLARE_ALIGNED(16, unsigned char, flat_op1[16]);
DECLARE_ALIGNED(16, unsigned char, flat_op0[16]);
@@ -745,8 +732,6 @@
__m128i p3, p2, p1, p0, q0, q1, q2, q3;
__m128i q3p3, q2p2, q1p1, q0p0, p1q1, p0q0;
- (void)count;
-
q3p3 = _mm_unpacklo_epi64(_mm_loadl_epi64((__m128i *)(s - 4 * p)),
_mm_loadl_epi64((__m128i *)(s + 3 * p)));
q2p2 = _mm_unpacklo_epi64(_mm_loadl_epi64((__m128i *)(s - 3 * p)),
@@ -1492,11 +1477,10 @@
void vpx_lpf_vertical_8_sse2(unsigned char *s, int p,
const unsigned char *blimit,
const unsigned char *limit,
-                             const unsigned char *thresh, int count) {+                             const unsigned char *thresh) {DECLARE_ALIGNED(8, unsigned char, t_dst[8 * 8]);
unsigned char *src[1];
unsigned char *dst[1];
- (void)count;
// Transpose 8x8
src[0] = s - 4;
@@ -1505,7 +1489,7 @@
transpose(src, p, dst, 8, 1);
// Loop filtering
- vpx_lpf_horizontal_8_sse2(t_dst + 4 * 8, 8, blimit, limit, thresh, 1);
+ vpx_lpf_horizontal_8_sse2(t_dst + 4 * 8, 8, blimit, limit, thresh);
src[0] = t_dst;
dst[0] = s - 4;
@@ -1557,7 +1541,7 @@
transpose(src, p, dst, 8, 2);
// Loop filtering
- mb_lpf_horizontal_edge_w_sse2_8(t_dst + 8 * 8, 8, blimit, limit, thresh);
+ vpx_lpf_horizontal_edge_8_sse2(t_dst + 8 * 8, 8, blimit, limit, thresh);
src[0] = t_dst;
src[1] = t_dst + 8 * 8;
@@ -1578,8 +1562,7 @@
transpose8x16(s, s + 8 * p, p, t_dst + 8 * 16, 16);
// Loop filtering
- mb_lpf_horizontal_edge_w_sse2_16(t_dst + 8 * 16, 16, blimit, limit,
- thresh);
+ vpx_lpf_horizontal_edge_16_sse2(t_dst + 8 * 16, 16, blimit, limit, thresh);
// Transpose back
transpose8x16(t_dst, t_dst + 8 * 16, 16, s - 8, p);
--
⑨