shithub: libvpx

--- a/test/vp9_quantize_test.cc

+++ b/test/vp9_quantize_test.cc

@@ -77,7 +77,12 @@

         coeff_(Buffer<tran_low_t>(max_size_, max_size_, 0, 16)),

         qcoeff_(Buffer<tran_low_t>(max_size_, max_size_, 0, 32)),

         dqcoeff_(Buffer<tran_low_t>(max_size_, max_size_, 0, 32)) {

+    // TODO(jianj): SSSE3 and AVX2 tests fail on extreme values.

+#if HAVE_NEON

+    max_value_ = (1 << (7 + bit_depth_)) - 1;

+#else

     max_value_ = (1 << bit_depth_) - 1;

+#endif

     zbin_ptr_ =

         reinterpret_cast<int16_t *>(vpx_memalign(16, 8 * sizeof(*zbin_ptr_)));

     round_fp_ptr_ = reinterpret_cast<int16_t *>(

--- a/vp9/encoder/arm/neon/vp9_quantize_neon.c

+++ b/vp9/encoder/arm/neon/vp9_quantize_neon.c

@@ -55,7 +55,8 @@

     const int16x8_t v_iscan = vld1q_s16(&iscan[0]);

     const int16x8_t v_coeff = load_tran_low_to_s16q(coeff_ptr);

     const int16x8_t v_coeff_sign = vshrq_n_s16(v_coeff, 15);

-    const int16x8_t v_tmp = vabaq_s16(v_round, v_coeff, v_zero);

+    const int16x8_t v_abs = vabsq_s16(v_coeff);

+    const int16x8_t v_tmp = vqaddq_s16(v_abs, v_round);

     const int32x4_t v_tmp_lo =

         vmull_s16(vget_low_s16(v_tmp), vget_low_s16(v_quant));

     const int32x4_t v_tmp_hi =

@@ -80,7 +81,8 @@

     const int16x8_t v_iscan = vld1q_s16(&iscan[i]);

     const int16x8_t v_coeff = load_tran_low_to_s16q(coeff_ptr + i);

     const int16x8_t v_coeff_sign = vshrq_n_s16(v_coeff, 15);

-    const int16x8_t v_tmp = vabaq_s16(v_round, v_coeff, v_zero);

+    const int16x8_t v_abs = vabsq_s16(v_coeff);

+    const int16x8_t v_tmp = vqaddq_s16(v_abs, v_round);

     const int32x4_t v_tmp_lo =

         vmull_s16(vget_low_s16(v_tmp), vget_low_s16(v_quant));

     const int32x4_t v_tmp_hi =

@@ -146,7 +148,7 @@

   const int16x8_t dequant_mask =

       vreinterpretq_s16_u16(vcgeq_s16(coeff_abs, dequant_thresh));

-  int16x8_t qcoeff = vaddq_s16(coeff_abs, round);

+  int16x8_t qcoeff = vqaddq_s16(coeff_abs, round);

   int32x4_t dqcoeff_0, dqcoeff_1;

   int16x8_t dqcoeff;

   uint16x8_t eob_max;

@@ -200,7 +202,7 @@

       const int16x8_t dequant_mask =

           vreinterpretq_s16_u16(vcgeq_s16(coeff_abs, dequant_thresh));

-      int16x8_t qcoeff = vaddq_s16(coeff_abs, round);

+      int16x8_t qcoeff = vqaddq_s16(coeff_abs, round);

       int32x4_t dqcoeff_0, dqcoeff_1;

       int16x8_t dqcoeff;

--

⑨