ref: d331e7a1c0c59d4055a3bfacd051268ec0832b48
parent: d49a1a5329ea43968faaf295f7da5f72b28f971e
	author: Linfeng Zhang <linfengz@google.com>
	date: Mon Aug 28 06:35:43 EDT 2017
	
Remove get_filter_base() and get_filter_offset() in convolve so that the convolve functions are independent of table alignment. Change-Id: Ieab132a30d72c6e75bbe9473544fbe2cf51541ee
--- a/test/convolve_test.cc
+++ b/test/convolve_test.cc
@@ -33,9 +33,9 @@
typedef void (*ConvolveFunc)(const uint8_t *src, ptrdiff_t src_stride,
uint8_t *dst, ptrdiff_t dst_stride,
- const int16_t *filter_x, int filter_x_stride,
- const int16_t *filter_y, int filter_y_stride,
- int w, int h);
+ const InterpKernel *filter, int x0_q4,
+ int x_step_q4, int y0_q4, int y_step_q4, int w,
+ int h);
typedef void (*WrapperFilterBlock2d8Func)(
const uint8_t *src_ptr, const unsigned int src_stride,
@@ -550,7 +550,7 @@
vpx_usec_timer_start(&timer);
   for (int n = 0; n < kNumTests; ++n) {- UUT_->copy_[0](in, kInputStride, out, kOutputStride, NULL, 0, NULL, 0,
+ UUT_->copy_[0](in, kInputStride, out, kOutputStride, NULL, 0, 0, 0, 0,
width, height);
}
vpx_usec_timer_mark(&timer);
@@ -570,7 +570,7 @@
vpx_usec_timer_start(&timer);
   for (int n = 0; n < kNumTests; ++n) {- UUT_->copy_[1](in, kInputStride, out, kOutputStride, NULL, 0, NULL, 0,
+ UUT_->copy_[1](in, kInputStride, out, kOutputStride, NULL, 0, 0, 0, 0,
width, height);
}
vpx_usec_timer_mark(&timer);
@@ -585,7 +585,7 @@
uint8_t *const out = output();
ASM_REGISTER_STATE_CHECK(UUT_->copy_[0](in, kInputStride, out, kOutputStride,
- NULL, 0, NULL, 0, Width(), Height()));
+ NULL, 0, 0, 0, 0, Width(), Height()));
CheckGuardBlocks();
@@ -604,7 +604,7 @@
CopyOutputToRef();
ASM_REGISTER_STATE_CHECK(UUT_->copy_[1](in, kInputStride, out, kOutputStride,
- NULL, 0, NULL, 0, Width(), Height()));
+ NULL, 0, 0, 0, 0, Width(), Height()));
CheckGuardBlocks();
@@ -621,12 +621,10 @@
 TEST_P(ConvolveTest, CopyHoriz) {uint8_t *const in = input();
uint8_t *const out = output();
- DECLARE_ALIGNED(256, const int16_t,
-                  filter8[8]) = { 0, 0, 0, 128, 0, 0, 0, 0 };ASM_REGISTER_STATE_CHECK(UUT_->sh8_[0](in, kInputStride, out, kOutputStride,
- filter8, 16, filter8, 16, Width(),
- Height()));
+ vp9_filter_kernels[0], 0, 16, 0, 16,
+ Width(), Height()));
CheckGuardBlocks();
@@ -641,12 +639,10 @@
 TEST_P(ConvolveTest, CopyVert) {uint8_t *const in = input();
uint8_t *const out = output();
- DECLARE_ALIGNED(256, const int16_t,
-                  filter8[8]) = { 0, 0, 0, 128, 0, 0, 0, 0 };ASM_REGISTER_STATE_CHECK(UUT_->sv8_[0](in, kInputStride, out, kOutputStride,
- filter8, 16, filter8, 16, Width(),
- Height()));
+ vp9_filter_kernels[0], 0, 16, 0, 16,
+ Width(), Height()));
CheckGuardBlocks();
@@ -661,12 +657,10 @@
 TEST_P(ConvolveTest, Copy2D) {uint8_t *const in = input();
uint8_t *const out = output();
- DECLARE_ALIGNED(256, const int16_t,
-                  filter8[8]) = { 0, 0, 0, 128, 0, 0, 0, 0 };ASM_REGISTER_STATE_CHECK(UUT_->shv8_[0](in, kInputStride, out, kOutputStride,
- filter8, 16, filter8, 16, Width(),
- Height()));
+ vp9_filter_kernels[0], 0, 16, 0, 16,
+ Width(), Height()));
CheckGuardBlocks();
@@ -702,7 +696,6 @@
}
}
-const int16_t kInvalidFilter[8] = { 0 }; const WrapperFilterBlock2d8Func wrapper_filter_block2d_8[2] = {wrapper_filter_block2d_8_c, wrapper_filter_average_block2d_8_c
};
@@ -755,21 +748,21 @@
Width(), Height(), UUT_->use_highbd_);
if (filter_x && filter_y)
- ASM_REGISTER_STATE_CHECK(UUT_->hv8_[i](
- in, kInputStride, out, kOutputStride, filters[filter_x], 16,
- filters[filter_y], 16, Width(), Height()));
+ ASM_REGISTER_STATE_CHECK(
+ UUT_->hv8_[i](in, kInputStride, out, kOutputStride, filters,
+ filter_x, 16, filter_y, 16, Width(), Height()));
else if (filter_y)
- ASM_REGISTER_STATE_CHECK(UUT_->v8_[i](
- in, kInputStride, out, kOutputStride, kInvalidFilter, 16,
- filters[filter_y], 16, Width(), Height()));
+ ASM_REGISTER_STATE_CHECK(
+ UUT_->v8_[i](in, kInputStride, out, kOutputStride, filters, 0,
+ 16, filter_y, 16, Width(), Height()));
else if (filter_x)
- ASM_REGISTER_STATE_CHECK(UUT_->h8_[i](
- in, kInputStride, out, kOutputStride, filters[filter_x], 16,
- kInvalidFilter, 16, Width(), Height()));
+ ASM_REGISTER_STATE_CHECK(
+ UUT_->h8_[i](in, kInputStride, out, kOutputStride, filters,
+ filter_x, 16, 0, 16, Width(), Height()));
else
- ASM_REGISTER_STATE_CHECK(UUT_->copy_[i](
- in, kInputStride, out, kOutputStride, kInvalidFilter, 0,
- kInvalidFilter, 0, Width(), Height()));
+ ASM_REGISTER_STATE_CHECK(UUT_->copy_[i](in, kInputStride, out,
+ kOutputStride, NULL, 0, 0,
+ 0, 0, Width(), Height()));
CheckGuardBlocks();
@@ -853,21 +846,21 @@
filters[filter_y], ref, kOutputStride,
Width(), Height(), UUT_->use_highbd_);
if (filter_x && filter_y)
- ASM_REGISTER_STATE_CHECK(UUT_->hv8_[0](
- in, kInputStride, out, kOutputStride, filters[filter_x], 16,
- filters[filter_y], 16, Width(), Height()));
+ ASM_REGISTER_STATE_CHECK(
+ UUT_->hv8_[0](in, kInputStride, out, kOutputStride, filters,
+ filter_x, 16, filter_y, 16, Width(), Height()));
else if (filter_y)
- ASM_REGISTER_STATE_CHECK(UUT_->v8_[0](
- in, kInputStride, out, kOutputStride, kInvalidFilter, 16,
- filters[filter_y], 16, Width(), Height()));
+ ASM_REGISTER_STATE_CHECK(
+ UUT_->v8_[0](in, kInputStride, out, kOutputStride, filters, 0,
+ 16, filter_y, 16, Width(), Height()));
else if (filter_x)
- ASM_REGISTER_STATE_CHECK(UUT_->h8_[0](
- in, kInputStride, out, kOutputStride, filters[filter_x], 16,
- kInvalidFilter, 16, Width(), Height()));
+ ASM_REGISTER_STATE_CHECK(
+ UUT_->h8_[0](in, kInputStride, out, kOutputStride, filters,
+ filter_x, 16, 0, 16, Width(), Height()));
else
- ASM_REGISTER_STATE_CHECK(UUT_->copy_[0](
- in, kInputStride, out, kOutputStride, kInvalidFilter, 0,
- kInvalidFilter, 0, Width(), Height()));
+ ASM_REGISTER_STATE_CHECK(UUT_->copy_[0](in, kInputStride, out,
+ kOutputStride, NULL, 0, 0,
+ 0, 0, Width(), Height()));
             for (int y = 0; y < Height(); ++y) {for (int x = 0; x < Width(); ++x)
@@ -897,8 +890,8 @@
     for (int step = 1; step <= 32; ++step) {/* Test the horizontal and vertical filters in combination. */
ASM_REGISTER_STATE_CHECK(
- UUT_->shv8_[0](in, kInputStride, out, kOutputStride, eighttap[frac],
- step, eighttap[frac], step, Width(), Height()));
+ UUT_->shv8_[0](in, kInputStride, out, kOutputStride, eighttap, frac,
+ step, frac, step, Width(), Height()));
CheckGuardBlocks();
@@ -917,14 +910,14 @@
using std::tr1::make_tuple;
#if CONFIG_VP9_HIGHBITDEPTH
-#define WRAP(func, bd) \
- void wrap_##func##_##bd( \
- const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, \
- ptrdiff_t dst_stride, const int16_t *filter_x, int filter_x_stride, \
-      const int16_t *filter_y, int filter_y_stride, int w, int h) {            \- vpx_highbd_##func(reinterpret_cast<const uint16_t *>(src), src_stride, \
- reinterpret_cast<uint16_t *>(dst), dst_stride, filter_x, \
- filter_x_stride, filter_y, filter_y_stride, w, h, bd); \
+#define WRAP(func, bd) \
+ void wrap_##func##_##bd( \
+ const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, \
+ ptrdiff_t dst_stride, const InterpKernel *filter, int x0_q4, \
+      int x_step_q4, int y0_q4, int y_step_q4, int w, int h) {               \+ vpx_highbd_##func(reinterpret_cast<const uint16_t *>(src), src_stride, \
+ reinterpret_cast<uint16_t *>(dst), dst_stride, filter, \
+ x0_q4, x_step_q4, y0_q4, y_step_q4, w, h, bd); \
}
#if HAVE_SSE2 && ARCH_X86_64
--- a/vp9/common/vp9_reconinter.h
+++ b/vp9/common/vp9_reconinter.h
@@ -26,9 +26,9 @@
const struct scale_factors *sf, int w, int h,
int ref, const InterpKernel *kernel, int xs,
                                    int ys) {- sf->predict[subpel_x != 0][subpel_y != 0][ref](
- src, src_stride, dst, dst_stride, kernel[subpel_x], xs, kernel[subpel_y],
- ys, w, h);
+ sf->predict[subpel_x != 0][subpel_y != 0][ref](src, src_stride, dst,
+ dst_stride, kernel, subpel_x,
+ xs, subpel_y, ys, w, h);
}
#if CONFIG_VP9_HIGHBITDEPTH
@@ -37,8 +37,8 @@
const int subpel_x, const int subpel_y, const struct scale_factors *sf,
     int w, int h, int ref, const InterpKernel *kernel, int xs, int ys, int bd) {sf->highbd_predict[subpel_x != 0][subpel_y != 0][ref](
- src, src_stride, dst, dst_stride, kernel[subpel_x], xs, kernel[subpel_y],
- ys, w, h, bd);
+ src, src_stride, dst, dst_stride, kernel, subpel_x, xs, subpel_y, ys, w,
+ h, bd);
}
#endif // CONFIG_VP9_HIGHBITDEPTH
--- a/vp9/encoder/vp9_denoiser.c
+++ b/vp9/encoder/vp9_denoiser.c
@@ -390,12 +390,12 @@
}
   if (decision == FILTER_BLOCK) {- vpx_convolve_copy(avg_start, avg.y_stride, src.buf, src.stride, NULL, 0,
- NULL, 0, num_4x4_blocks_wide_lookup[bs] << 2,
+ vpx_convolve_copy(avg_start, avg.y_stride, src.buf, src.stride, NULL, 0, 0,
+ 0, 0, num_4x4_blocks_wide_lookup[bs] << 2,
num_4x4_blocks_high_lookup[bs] << 2);
   } else {  // COPY_BLOCK- vpx_convolve_copy(src.buf, src.stride, avg_start, avg.y_stride, NULL, 0,
- NULL, 0, num_4x4_blocks_wide_lookup[bs] << 2,
+ vpx_convolve_copy(src.buf, src.stride, avg_start, avg.y_stride, NULL, 0, 0,
+ 0, 0, num_4x4_blocks_wide_lookup[bs] << 2,
num_4x4_blocks_high_lookup[bs] << 2);
}
*denoiser_decision = decision;
--- a/vp9/encoder/vp9_encoder.c
+++ b/vp9/encoder/vp9_encoder.c
@@ -2645,15 +2645,14 @@
         if (src->flags & YV12_FLAG_HIGHBITDEPTH) {vpx_highbd_convolve8(CONVERT_TO_SHORTPTR(src_ptr), src_stride,
- CONVERT_TO_SHORTPTR(dst_ptr), dst_stride,
- kernel[x_q4 & 0xf], 16 * src_w / dst_w,
- kernel[y_q4 & 0xf], 16 * src_h / dst_h,
- 16 / factor, 16 / factor, bd);
+ CONVERT_TO_SHORTPTR(dst_ptr), dst_stride, kernel,
+ x_q4 & 0xf, 16 * src_w / dst_w, y_q4 & 0xf,
+ 16 * src_h / dst_h, 16 / factor, 16 / factor,
+ bd);
         } else {- vpx_scaled_2d(src_ptr, src_stride, dst_ptr, dst_stride,
- kernel[x_q4 & 0xf], 16 * src_w / dst_w,
- kernel[y_q4 & 0xf], 16 * src_h / dst_h, 16 / factor,
- 16 / factor);
+ vpx_scaled_2d(src_ptr, src_stride, dst_ptr, dst_stride, kernel,
+ x_q4 & 0xf, 16 * src_w / dst_w, y_q4 & 0xf,
+ 16 * src_h / dst_h, 16 / factor, 16 / factor);
}
}
}
--- a/vp9/encoder/vp9_frame_scale.c
+++ b/vp9/encoder/vp9_frame_scale.c
@@ -43,10 +43,9 @@
(x / factor) * src_w / dst_w;
uint8_t *dst_ptr = dsts[i] + (y / factor) * dst_stride + (x / factor);
- vpx_scaled_2d(src_ptr, src_stride, dst_ptr, dst_stride,
- kernel[x_q4 & 0xf], 16 * src_w / dst_w,
- kernel[y_q4 & 0xf], 16 * src_h / dst_h, 16 / factor,
- 16 / factor);
+ vpx_scaled_2d(src_ptr, src_stride, dst_ptr, dst_stride, kernel,
+ x_q4 & 0xf, 16 * src_w / dst_w, y_q4 & 0xf,
+ 16 * src_h / dst_h, 16 / factor, 16 / factor);
}
}
}
--- a/vp9/encoder/vp9_pickmode.c
+++ b/vp9/encoder/vp9_pickmode.c
@@ -2162,15 +2162,15 @@
vpx_highbd_convolve_copy(
CONVERT_TO_SHORTPTR(best_pred->data), best_pred->stride,
CONVERT_TO_SHORTPTR(this_mode_pred->data), this_mode_pred->stride,
- NULL, 0, NULL, 0, bw, bh, xd->bd);
+ NULL, 0, 0, 0, 0, bw, bh, xd->bd);
else
vpx_convolve_copy(best_pred->data, best_pred->stride,
this_mode_pred->data, this_mode_pred->stride, NULL,
- 0, NULL, 0, bw, bh);
+ 0, 0, 0, 0, bw, bh);
#else
vpx_convolve_copy(best_pred->data, best_pred->stride,
this_mode_pred->data, this_mode_pred->stride, NULL, 0,
- NULL, 0, bw, bh);
+ 0, 0, 0, bw, bh);
#endif // CONFIG_VP9_HIGHBITDEPTH
best_pred = this_mode_pred;
}
@@ -2264,14 +2264,14 @@
if (cm->use_highbitdepth)
vpx_highbd_convolve_copy(
CONVERT_TO_SHORTPTR(best_pred->data), best_pred->stride,
- CONVERT_TO_SHORTPTR(pd->dst.buf), pd->dst.stride, NULL, 0, NULL, 0,
+ CONVERT_TO_SHORTPTR(pd->dst.buf), pd->dst.stride, NULL, 0, 0, 0, 0,
bw, bh, xd->bd);
else
vpx_convolve_copy(best_pred->data, best_pred->stride, pd->dst.buf,
- pd->dst.stride, NULL, 0, NULL, 0, bw, bh);
+ pd->dst.stride, NULL, 0, 0, 0, 0, bw, bh);
#else
vpx_convolve_copy(best_pred->data, best_pred->stride, pd->dst.buf,
- pd->dst.stride, NULL, 0, NULL, 0, bw, bh);
+ pd->dst.stride, NULL, 0, 0, 0, 0, bw, bh);
#endif // CONFIG_VP9_HIGHBITDEPTH
}
}
--- a/vp9/encoder/vp9_rdopt.c
+++ b/vp9/encoder/vp9_rdopt.c
@@ -600,7 +600,7 @@
#if CONFIG_VP9_HIGHBITDEPTH
       if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {vpx_highbd_convolve_copy(CONVERT_TO_SHORTPTR(dst), dst_stride, recon16,
- 32, NULL, 0, NULL, 0, bs, bs, xd->bd);
+ 32, NULL, 0, 0, 0, 0, bs, bs, xd->bd);
         if (xd->lossless) {vp9_highbd_iwht4x4_add(dqcoeff, recon16, 32, *eob, xd->bd);
         } else {@@ -623,7 +623,7 @@
recon = CONVERT_TO_BYTEPTR(recon16);
       } else {#endif // CONFIG_VP9_HIGHBITDEPTH
- vpx_convolve_copy(dst, dst_stride, recon, 32, NULL, 0, NULL, 0, bs, bs);
+ vpx_convolve_copy(dst, dst_stride, recon, 32, NULL, 0, 0, 0, 0, bs, bs);
         switch (tx_size) {case TX_32X32: vp9_idct32x32_add(dqcoeff, recon, 32, *eob); break;
case TX_16X16: vp9_idct16x16_add(dqcoeff, recon, 32, *eob); break;
--- a/vpx_dsp/arm/highbd_vpx_convolve8_neon.c
+++ b/vpx_dsp/arm/highbd_vpx_convolve8_neon.c
@@ -137,15 +137,14 @@
void vpx_highbd_convolve8_horiz_neon(const uint16_t *src, ptrdiff_t src_stride,
uint16_t *dst, ptrdiff_t dst_stride,
- const int16_t *filter_x, int x_step_q4,
- const int16_t *filter_y, // unused
- int y_step_q4, // unused
+ const InterpKernel *filter, int x0_q4,
+ int x_step_q4, int y0_q4, int y_step_q4,
                                      int w, int h, int bd) {   if (x_step_q4 != 16) {- vpx_highbd_convolve8_horiz_c(src, src_stride, dst, dst_stride, filter_x,
- x_step_q4, filter_y, y_step_q4, w, h, bd);
+ vpx_highbd_convolve8_horiz_c(src, src_stride, dst, dst_stride, filter,
+ x0_q4, x_step_q4, y0_q4, y_step_q4, w, h, bd);
   } else {- const int16x8_t filters = vld1q_s16(filter_x);
+ const int16x8_t filters = vld1q_s16(filter[x0_q4]);
const uint16x8_t max = vdupq_n_u16((1 << bd) - 1);
uint16x8_t t0, t1, t2, t3;
@@ -337,15 +336,15 @@
void vpx_highbd_convolve8_avg_horiz_neon(const uint16_t *src,
ptrdiff_t src_stride, uint16_t *dst,
ptrdiff_t dst_stride,
- const int16_t *filter_x, int x_step_q4,
- const int16_t *filter_y, // unused
- int y_step_q4, // unused
-                                         int w, int h, int bd) {+ const InterpKernel *filter, int x0_q4,
+ int x_step_q4, int y0_q4,
+                                         int y_step_q4, int w, int h, int bd) {   if (x_step_q4 != 16) {- vpx_highbd_convolve8_avg_horiz_c(src, src_stride, dst, dst_stride, filter_x,
- x_step_q4, filter_y, y_step_q4, w, h, bd);
+ vpx_highbd_convolve8_avg_horiz_c(src, src_stride, dst, dst_stride, filter,
+ x0_q4, x_step_q4, y0_q4, y_step_q4, w, h,
+ bd);
   } else {- const int16x8_t filters = vld1q_s16(filter_x);
+ const int16x8_t filters = vld1q_s16(filter[x0_q4]);
const uint16x8_t max = vdupq_n_u16((1 << bd) - 1);
uint16x8_t t0, t1, t2, t3;
@@ -566,15 +565,14 @@
void vpx_highbd_convolve8_vert_neon(const uint16_t *src, ptrdiff_t src_stride,
uint16_t *dst, ptrdiff_t dst_stride,
- const int16_t *filter_x, // unused
- int x_step_q4, // unused
- const int16_t *filter_y, int y_step_q4,
+ const InterpKernel *filter, int x0_q4,
+ int x_step_q4, int y0_q4, int y_step_q4,
                                     int w, int h, int bd) {   if (y_step_q4 != 16) {- vpx_highbd_convolve8_vert_c(src, src_stride, dst, dst_stride, filter_x,
- x_step_q4, filter_y, y_step_q4, w, h, bd);
+ vpx_highbd_convolve8_vert_c(src, src_stride, dst, dst_stride, filter, x0_q4,
+ x_step_q4, y0_q4, y_step_q4, w, h, bd);
   } else {- const int16x8_t filters = vld1q_s16(filter_y);
+ const int16x8_t filters = vld1q_s16(filter[y0_q4]);
const uint16x8_t max = vdupq_n_u16((1 << bd) - 1);
assert(!((intptr_t)dst & 3));
@@ -732,15 +730,15 @@
void vpx_highbd_convolve8_avg_vert_neon(const uint16_t *src,
ptrdiff_t src_stride, uint16_t *dst,
ptrdiff_t dst_stride,
- const int16_t *filter_x, // unused
- int x_step_q4, // unused
- const int16_t *filter_y, int y_step_q4,
+ const InterpKernel *filter, int x0_q4,
+ int x_step_q4, int y0_q4, int y_step_q4,
                                         int w, int h, int bd) {   if (y_step_q4 != 16) {- vpx_highbd_convolve8_avg_vert_c(src, src_stride, dst, dst_stride, filter_x,
- x_step_q4, filter_y, y_step_q4, w, h, bd);
+ vpx_highbd_convolve8_avg_vert_c(src, src_stride, dst, dst_stride, filter,
+ x0_q4, x_step_q4, y0_q4, y_step_q4, w, h,
+ bd);
   } else {- const int16x8_t filters = vld1q_s16(filter_y);
+ const int16x8_t filters = vld1q_s16(filter[y0_q4]);
const uint16x8_t max = vdupq_n_u16((1 << bd) - 1);
assert(!((intptr_t)dst & 3));
--- a/vpx_dsp/arm/highbd_vpx_convolve_avg_neon.c
+++ b/vpx_dsp/arm/highbd_vpx_convolve_avg_neon.c
@@ -15,13 +15,14 @@
void vpx_highbd_convolve_avg_neon(const uint16_t *src, ptrdiff_t src_stride,
uint16_t *dst, ptrdiff_t dst_stride,
- const int16_t *filter_x, int filter_x_stride,
- const int16_t *filter_y, int filter_y_stride,
+ const InterpKernel *filter, int x0_q4,
+ int x_step_q4, int y0_q4, int y_step_q4,
                                   int w, int h, int bd) {- (void)filter_x;
- (void)filter_x_stride;
- (void)filter_y;
- (void)filter_y_stride;
+ (void)filter;
+ (void)x0_q4;
+ (void)x_step_q4;
+ (void)y0_q4;
+ (void)y_step_q4;
(void)bd;
   if (w < 8) {  // avg4--- a/vpx_dsp/arm/highbd_vpx_convolve_copy_neon.c
+++ b/vpx_dsp/arm/highbd_vpx_convolve_copy_neon.c
@@ -15,13 +15,14 @@
void vpx_highbd_convolve_copy_neon(const uint16_t *src, ptrdiff_t src_stride,
uint16_t *dst, ptrdiff_t dst_stride,
- const int16_t *filter_x, int filter_x_stride,
- const int16_t *filter_y, int filter_y_stride,
+ const InterpKernel *filter, int x0_q4,
+ int x_step_q4, int y0_q4, int y_step_q4,
                                    int w, int h, int bd) {- (void)filter_x;
- (void)filter_x_stride;
- (void)filter_y;
- (void)filter_y_stride;
+ (void)filter;
+ (void)x0_q4;
+ (void)x_step_q4;
+ (void)y0_q4;
+ (void)y_step_q4;
(void)bd;
   if (w < 8) {  // copy4--- a/vpx_dsp/arm/highbd_vpx_convolve_neon.c
+++ b/vpx_dsp/arm/highbd_vpx_convolve_neon.c
@@ -15,10 +15,9 @@
void vpx_highbd_convolve8_neon(const uint16_t *src, ptrdiff_t src_stride,
uint16_t *dst, ptrdiff_t dst_stride,
- const int16_t *filter_x, int x_step_q4,
- const int16_t *filter_y, int y_step_q4, int w,
+ const InterpKernel *filter, int x0_q4,
+ int x_step_q4, int y0_q4, int y_step_q4, int w,
                                int h, int bd) {- const int y0_q4 = get_filter_offset(filter_y, get_filter_base(filter_y));
// + 1 to make it divisible by 4
uint16_t temp[64 * 136];
const int intermediate_height =
@@ -29,20 +28,19 @@
* buffer which has lots of extra room and is subsequently discarded this is
* safe if somewhat less than ideal. */
vpx_highbd_convolve8_horiz_neon(src - src_stride * 3, src_stride, temp, w,
- filter_x, x_step_q4, filter_y, y_step_q4, w,
+ filter, x0_q4, x_step_q4, y0_q4, y_step_q4, w,
intermediate_height, bd);
/* Step into the temp buffer 3 lines to get the actual frame data */
- vpx_highbd_convolve8_vert_neon(temp + w * 3, w, dst, dst_stride, filter_x,
- x_step_q4, filter_y, y_step_q4, w, h, bd);
+ vpx_highbd_convolve8_vert_neon(temp + w * 3, w, dst, dst_stride, filter,
+ x0_q4, x_step_q4, y0_q4, y_step_q4, w, h, bd);
}
void vpx_highbd_convolve8_avg_neon(const uint16_t *src, ptrdiff_t src_stride,
uint16_t *dst, ptrdiff_t dst_stride,
- const int16_t *filter_x, int x_step_q4,
- const int16_t *filter_y, int y_step_q4,
+ const InterpKernel *filter, int x0_q4,
+ int x_step_q4, int y0_q4, int y_step_q4,
                                    int w, int h, int bd) {- const int y0_q4 = get_filter_offset(filter_y, get_filter_base(filter_y));
// + 1 to make it divisible by 4
uint16_t temp[64 * 136];
const int intermediate_height =
@@ -52,8 +50,9 @@
* to average the values after both passes.
*/
vpx_highbd_convolve8_horiz_neon(src - src_stride * 3, src_stride, temp, w,
- filter_x, x_step_q4, filter_y, y_step_q4, w,
+ filter, x0_q4, x_step_q4, y0_q4, y_step_q4, w,
intermediate_height, bd);
- vpx_highbd_convolve8_avg_vert_neon(temp + w * 3, w, dst, dst_stride, filter_x,
- x_step_q4, filter_y, y_step_q4, w, h, bd);
+ vpx_highbd_convolve8_avg_vert_neon(temp + w * 3, w, dst, dst_stride, filter,
+ x0_q4, x_step_q4, y0_q4, y_step_q4, w, h,
+ bd);
}
--- a/vpx_dsp/arm/vpx_convolve8_avg_neon_asm.asm
+++ b/vpx_dsp/arm/vpx_convolve8_avg_neon_asm.asm
@@ -42,10 +42,11 @@
; r1 int src_stride
; r2 uint8_t *dst
; r3 int dst_stride
-; sp[]const int16_t *filter_x
-; sp[]int x_step_q4
-; sp[]const int16_t *filter_y ; unused
-; sp[]int y_step_q4 ; unused
+; sp[]const int16_t *filter
+; sp[]int x0_q4
+; sp[]int x_step_q4 ; unused
+; sp[]int y0_q4
+; sp[]int y_step_q4 ; unused
; sp[]int w
; sp[]int h
@@ -54,11 +55,11 @@
sub r0, r0, #3 ; adjust for taps
- ldr r5, [sp, #32] ; filter_x
- ldr r6, [sp, #48] ; w
- ldr r7, [sp, #52] ; h
+ ldrd r4, r5, [sp, #32] ; filter, x0_q4
+ add r4, r5, lsl #4
+ ldrd r6, r7, [sp, #52] ; w, h
-    vld1.s16        {q0}, [r5]              ; filter_x+    vld1.s16        {q0}, [r4]              ; filtersub r8, r1, r1, lsl #2 ; -src_stride * 3
add r8, r8, #4 ; -src_stride * 3 + 4
@@ -127,7 +128,7 @@
sub r2, r2, r3, lsl #2 ; reset for store
- ; src[] * filter_x
+ ; src[] * filter
MULTIPLY_BY_Q0 q1, d16, d17, d20, d22, d18, d19, d23, d24
MULTIPLY_BY_Q0 q2, d17, d20, d22, d18, d19, d23, d24, d26
MULTIPLY_BY_Q0 q14, d20, d22, d18, d19, d23, d24, d26, d27
@@ -184,11 +185,13 @@
sub r0, r0, r1
sub r0, r0, r1, lsl #1
- ldr r4, [sp, #32] ; filter_y
- ldr r6, [sp, #40] ; w
- ldr lr, [sp, #44] ; h
+ ldr r4, [sp, #24] ; filter
+ ldr r5, [sp, #36] ; y0_q4
+ add r4, r5, lsl #4
+ ldr r6, [sp, #44] ; w
+ ldr lr, [sp, #48] ; h
-    vld1.s16        {q0}, [r4]              ; filter_y+    vld1.s16        {q0}, [r4]              ; filterlsl r1, r1, #1
lsl r3, r3, #1
@@ -232,7 +235,7 @@
pld [r7]
pld [r4]
- ; src[] * filter_y
+ ; src[] * filter
MULTIPLY_BY_Q0 q1, d16, d17, d18, d19, d20, d21, d22, d24
pld [r7, r1]
--- a/vpx_dsp/arm/vpx_convolve8_neon.c
+++ b/vpx_dsp/arm/vpx_convolve8_neon.c
@@ -125,11 +125,10 @@
void vpx_convolve8_horiz_neon(const uint8_t *src, ptrdiff_t src_stride,
uint8_t *dst, ptrdiff_t dst_stride,
- const int16_t *filter_x, int x_step_q4,
- const int16_t *filter_y, // unused
- int y_step_q4, // unused
-                              int w, int h) {- const int16x8_t filters = vld1q_s16(filter_x);
+ const InterpKernel *filter, int x0_q4,
+ int x_step_q4, int y0_q4, int y_step_q4, int w,
+                              int h) {+ const int16x8_t filters = vld1q_s16(filter[x0_q4]);
uint8x8_t t0, t1, t2, t3;
assert(!((intptr_t)dst & 3));
@@ -137,8 +136,8 @@
assert(x_step_q4 == 16);
(void)x_step_q4;
+ (void)y0_q4;
(void)y_step_q4;
- (void)filter_y;
src -= 3;
@@ -390,11 +389,10 @@
void vpx_convolve8_avg_horiz_neon(const uint8_t *src, ptrdiff_t src_stride,
uint8_t *dst, ptrdiff_t dst_stride,
- const int16_t *filter_x, int x_step_q4,
- const int16_t *filter_y, // unused
- int y_step_q4, // unused
+ const InterpKernel *filter, int x0_q4,
+ int x_step_q4, int y0_q4, int y_step_q4,
                                   int w, int h) {- const int16x8_t filters = vld1q_s16(filter_x);
+ const int16x8_t filters = vld1q_s16(filter[x0_q4]);
uint8x8_t t0, t1, t2, t3;
assert(!((intptr_t)dst & 3));
@@ -402,8 +400,8 @@
assert(x_step_q4 == 16);
(void)x_step_q4;
+ (void)y0_q4;
(void)y_step_q4;
- (void)filter_y;
src -= 3;
@@ -692,19 +690,18 @@
void vpx_convolve8_vert_neon(const uint8_t *src, ptrdiff_t src_stride,
uint8_t *dst, ptrdiff_t dst_stride,
- const int16_t *filter_x, // unused
- int x_step_q4, // unused
- const int16_t *filter_y, int y_step_q4, int w,
+ const InterpKernel *filter, int x0_q4,
+ int x_step_q4, int y0_q4, int y_step_q4, int w,
                              int h) {- const int16x8_t filters = vld1q_s16(filter_y);
+ const int16x8_t filters = vld1q_s16(filter[y0_q4]);
assert(!((intptr_t)dst & 3));
assert(!(dst_stride & 3));
assert(y_step_q4 == 16);
+ (void)x0_q4;
(void)x_step_q4;
(void)y_step_q4;
- (void)filter_x;
src -= 3 * src_stride;
@@ -864,19 +861,18 @@
void vpx_convolve8_avg_vert_neon(const uint8_t *src, ptrdiff_t src_stride,
uint8_t *dst, ptrdiff_t dst_stride,
- const int16_t *filter_x, // unused
- int x_step_q4, // unused
- const int16_t *filter_y, int y_step_q4, int w,
+ const InterpKernel *filter, int x0_q4,
+ int x_step_q4, int y0_q4, int y_step_q4, int w,
                                  int h) {- const int16x8_t filters = vld1q_s16(filter_y);
+ const int16x8_t filters = vld1q_s16(filter[y0_q4]);
assert(!((intptr_t)dst & 3));
assert(!(dst_stride & 3));
assert(y_step_q4 == 16);
+ (void)x0_q4;
(void)x_step_q4;
(void)y_step_q4;
- (void)filter_x;
src -= 3 * src_stride;
--- a/vpx_dsp/arm/vpx_convolve8_neon_asm.asm
+++ b/vpx_dsp/arm/vpx_convolve8_neon_asm.asm
@@ -42,10 +42,11 @@
; r1 int src_stride
; r2 uint8_t *dst
; r3 int dst_stride
-; sp[]const int16_t *filter_x
-; sp[]int x_step_q4
-; sp[]const int16_t *filter_y ; unused
-; sp[]int y_step_q4 ; unused
+; sp[]const int16_t *filter
+; sp[]int x0_q4
+; sp[]int x_step_q4 ; unused
+; sp[]int y0_q4
+; sp[]int y_step_q4 ; unused
; sp[]int w
; sp[]int h
@@ -54,11 +55,11 @@
sub r0, r0, #3 ; adjust for taps
- ldr r5, [sp, #32] ; filter_x
- ldr r6, [sp, #48] ; w
- ldr r7, [sp, #52] ; h
+ ldrd r4, r5, [sp, #32] ; filter, x0_q4
+ add r4, r5, lsl #4
+ ldrd r6, r7, [sp, #52] ; w, h
-    vld1.s16        {q0}, [r5]              ; filter_x+    vld1.s16        {q0}, [r4]              ; filtersub r8, r1, r1, lsl #2 ; -src_stride * 3
add r8, r8, #4 ; -src_stride * 3 + 4
@@ -119,7 +120,7 @@
pld [r5, r1, lsl #1]
- ; src[] * filter_x
+ ; src[] * filter
MULTIPLY_BY_Q0 q1, d16, d17, d20, d22, d18, d19, d23, d24
MULTIPLY_BY_Q0 q2, d17, d20, d22, d18, d19, d23, d24, d26
MULTIPLY_BY_Q0 q14, d20, d22, d18, d19, d23, d24, d26, d27
@@ -173,11 +174,13 @@
sub r0, r0, r1
sub r0, r0, r1, lsl #1
- ldr r4, [sp, #32] ; filter_y
- ldr r6, [sp, #40] ; w
- ldr lr, [sp, #44] ; h
+ ldr r4, [sp, #24] ; filter
+ ldr r5, [sp, #36] ; y0_q4
+ add r4, r5, lsl #4
+ ldr r6, [sp, #44] ; w
+ ldr lr, [sp, #48] ; h
-    vld1.s16        {q0}, [r4]              ; filter_y+    vld1.s16        {q0}, [r4]              ; filterlsl r1, r1, #1
lsl r3, r3, #1
@@ -216,7 +219,7 @@
pld [r5]
pld [r8]
- ; src[] * filter_y
+ ; src[] * filter
MULTIPLY_BY_Q0 q1, d16, d17, d18, d19, d20, d21, d22, d24
pld [r5, r3]
--- a/vpx_dsp/arm/vpx_convolve_avg_neon.c
+++ b/vpx_dsp/arm/vpx_convolve_avg_neon.c
@@ -15,13 +15,13 @@
void vpx_convolve_avg_neon(const uint8_t *src, ptrdiff_t src_stride,
uint8_t *dst, ptrdiff_t dst_stride,
- const int16_t *filter_x, int filter_x_stride,
- const int16_t *filter_y, int filter_y_stride, int w,
-                           int h) {- (void)filter_x;
- (void)filter_x_stride;
- (void)filter_y;
- (void)filter_y_stride;
+ const InterpKernel *filter, int x0_q4, int x_step_q4,
+                           int y0_q4, int y_step_q4, int w, int h) {+ (void)filter;
+ (void)x0_q4;
+ (void)x_step_q4;
+ (void)y0_q4;
+ (void)y_step_q4;
   if (w < 8) {  // avg4uint8x8_t s0, s1;
--- a/vpx_dsp/arm/vpx_convolve_avg_neon_asm.asm
+++ b/vpx_dsp/arm/vpx_convolve_avg_neon_asm.asm
@@ -17,7 +17,7 @@
|vpx_convolve_avg_neon| PROC
     push                {r4-r6, lr}- ldrd r4, r5, [sp, #32]
+ ldrd r4, r5, [sp, #36]
mov r6, r2
cmp r4, #32
--- a/vpx_dsp/arm/vpx_convolve_copy_neon.c
+++ b/vpx_dsp/arm/vpx_convolve_copy_neon.c
@@ -15,13 +15,14 @@
void vpx_convolve_copy_neon(const uint8_t *src, ptrdiff_t src_stride,
uint8_t *dst, ptrdiff_t dst_stride,
- const int16_t *filter_x, int filter_x_stride,
- const int16_t *filter_y, int filter_y_stride, int w,
+ const InterpKernel *filter, int x0_q4,
+ int x_step_q4, int y0_q4, int y_step_q4, int w,
                             int h) {- (void)filter_x;
- (void)filter_x_stride;
- (void)filter_y;
- (void)filter_y_stride;
+ (void)filter;
+ (void)x0_q4;
+ (void)x_step_q4;
+ (void)y0_q4;
+ (void)y_step_q4;
   if (w < 8) {  // copy4     do {--- a/vpx_dsp/arm/vpx_convolve_copy_neon_asm.asm
+++ b/vpx_dsp/arm/vpx_convolve_copy_neon_asm.asm
@@ -17,7 +17,7 @@
|vpx_convolve_copy_neon| PROC
     push                {r4-r5, lr}- ldrd r4, r5, [sp, #28]
+ ldrd r4, r5, [sp, #32]
cmp r4, #32
bgt copy64
--- a/vpx_dsp/arm/vpx_convolve_neon.c
+++ b/vpx_dsp/arm/vpx_convolve_neon.c
@@ -15,8 +15,8 @@
#include "vpx_ports/mem.h"
void vpx_convolve8_neon(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst,
- ptrdiff_t dst_stride, const int16_t *filter_x,
- int x_step_q4, const int16_t *filter_y, int y_step_q4,
+ ptrdiff_t dst_stride, const InterpKernel *filter,
+ int x0_q4, int x_step_q4, int y0_q4, int y_step_q4,
                         int w, int h) {/* Given our constraints: w <= 64, h <= 64, taps == 8 we can reduce the
* maximum buffer size to 64 * 64 + 7 (+ 1 to make it divisible by 4).
@@ -33,19 +33,19 @@
* height and filter a multiple of 4 lines. Since this goes in to the temp
* buffer which has lots of extra room and is subsequently discarded this is
* safe if somewhat less than ideal. */
- vpx_convolve8_horiz_neon(src - src_stride * 3, src_stride, temp, w, filter_x,
- x_step_q4, filter_y, y_step_q4, w,
+ vpx_convolve8_horiz_neon(src - src_stride * 3, src_stride, temp, w, filter,
+ x0_q4, x_step_q4, y0_q4, y_step_q4, w,
intermediate_height);
/* Step into the temp buffer 3 lines to get the actual frame data */
- vpx_convolve8_vert_neon(temp + w * 3, w, dst, dst_stride, filter_x, x_step_q4,
- filter_y, y_step_q4, w, h);
+ vpx_convolve8_vert_neon(temp + w * 3, w, dst, dst_stride, filter, x0_q4,
+ x_step_q4, y0_q4, y_step_q4, w, h);
}
void vpx_convolve8_avg_neon(const uint8_t *src, ptrdiff_t src_stride,
uint8_t *dst, ptrdiff_t dst_stride,
- const int16_t *filter_x, int x_step_q4,
- const int16_t *filter_y, int y_step_q4, int w,
+ const InterpKernel *filter, int x0_q4,
+ int x_step_q4, int y0_q4, int y_step_q4, int w,
                             int h) {uint8_t temp[64 * 72];
const int intermediate_height = h + 7;
@@ -56,9 +56,9 @@
/* This implementation has the same issues as above. In addition, we only want
* to average the values after both passes.
*/
- vpx_convolve8_horiz_neon(src - src_stride * 3, src_stride, temp, w, filter_x,
- x_step_q4, filter_y, y_step_q4, w,
+ vpx_convolve8_horiz_neon(src - src_stride * 3, src_stride, temp, w, filter,
+ x0_q4, x_step_q4, y0_q4, y_step_q4, w,
intermediate_height);
- vpx_convolve8_avg_vert_neon(temp + w * 3, w, dst, dst_stride, filter_x,
- x_step_q4, filter_y, y_step_q4, w, h);
+ vpx_convolve8_avg_vert_neon(temp + w * 3, w, dst, dst_stride, filter, x0_q4,
+ x_step_q4, y0_q4, y_step_q4, w, h);
}
--- a/vpx_dsp/mips/convolve2_avg_dspr2.c
+++ b/vpx_dsp/mips/convolve2_avg_dspr2.c
@@ -219,9 +219,10 @@
void vpx_convolve2_avg_vert_dspr2(const uint8_t *src, ptrdiff_t src_stride,
uint8_t *dst, ptrdiff_t dst_stride,
- const int16_t *filter_x, int x_step_q4,
- const int16_t *filter_y, int y_step_q4, int w,
-                                  int h) {+ const InterpKernel *filter, int x0_q4,
+ int32_t x_step_q4, int y0_q4, int y_step_q4,
+                                  int w, int h) {+ const int16_t *const filter_y = filter[y0_q4];
uint32_t pos = 38;
assert(y_step_q4 == 16);
@@ -247,8 +248,8 @@
h);
break;
default:
- vpx_convolve8_avg_vert_c(src, src_stride, dst, dst_stride, filter_x,
- x_step_q4, filter_y, y_step_q4, w, h);
+ vpx_convolve8_avg_vert_c(src, src_stride, dst, dst_stride, filter, x0_q4,
+ x_step_q4, y0_q4, y_step_q4, w, h);
break;
}
}
--- a/vpx_dsp/mips/convolve2_avg_horiz_dspr2.c
+++ b/vpx_dsp/mips/convolve2_avg_horiz_dspr2.c
@@ -751,9 +751,10 @@
void vpx_convolve2_avg_horiz_dspr2(const uint8_t *src, ptrdiff_t src_stride,
uint8_t *dst, ptrdiff_t dst_stride,
- const int16_t *filter_x, int x_step_q4,
- const int16_t *filter_y, int y_step_q4,
+ const InterpKernel *filter, int x0_q4,
+ int32_t x_step_q4, int y0_q4, int y_step_q4,
                                    int w, int h) {+ const int16_t *const filter_x = filter[x0_q4];
uint32_t pos = 38;
assert(x_step_q4 == 16);
@@ -793,8 +794,8 @@
h);
break;
default:
- vpx_convolve8_avg_horiz_c(src, src_stride, dst, dst_stride, filter_x,
- x_step_q4, filter_y, y_step_q4, w, h);
+ vpx_convolve8_avg_horiz_c(src, src_stride, dst, dst_stride, filter, x0_q4,
+ x_step_q4, y0_q4, y_step_q4, w, h);
break;
}
}
--- a/vpx_dsp/mips/convolve2_horiz_dspr2.c
+++ b/vpx_dsp/mips/convolve2_horiz_dspr2.c
@@ -628,9 +628,10 @@
void vpx_convolve2_horiz_dspr2(const uint8_t *src, ptrdiff_t src_stride,
uint8_t *dst, ptrdiff_t dst_stride,
- const int16_t *filter_x, int x_step_q4,
- const int16_t *filter_y, int y_step_q4, int w,
-                               int h) {+ const InterpKernel *filter, int x0_q4,
+ int32_t x_step_q4, int y0_q4, int y_step_q4,
+                               int w, int h) {+ const int16_t *const filter_x = filter[x0_q4];
uint32_t pos = 38;
assert(x_step_q4 == 16);
@@ -672,8 +673,8 @@
(int32_t)dst_stride, filter_x, (int32_t)h);
break;
default:
- vpx_convolve8_horiz_c(src, src_stride, dst, dst_stride, filter_x,
- x_step_q4, filter_y, y_step_q4, w, h);
+ vpx_convolve8_horiz_c(src, src_stride, dst, dst_stride, filter, x0_q4,
+ x_step_q4, y0_q4, y_step_q4, w, h);
break;
}
}
--- a/vpx_dsp/mips/convolve2_vert_dspr2.c
+++ b/vpx_dsp/mips/convolve2_vert_dspr2.c
@@ -201,9 +201,10 @@
void vpx_convolve2_vert_dspr2(const uint8_t *src, ptrdiff_t src_stride,
uint8_t *dst, ptrdiff_t dst_stride,
- const int16_t *filter_x, int x_step_q4,
- const int16_t *filter_y, int y_step_q4, int w,
-                              int h) {+ const InterpKernel *filter, int x0_q4,
+ int32_t x_step_q4, int y0_q4, int y_step_q4,
+                              int w, int h) {+ const int16_t *const filter_y = filter[y0_q4];
uint32_t pos = 38;
assert(y_step_q4 == 16);
@@ -228,8 +229,8 @@
convolve_bi_vert_64_dspr2(src, src_stride, dst, dst_stride, filter_y, h);
break;
default:
- vpx_convolve8_vert_c(src, src_stride, dst, dst_stride, filter_x,
- x_step_q4, filter_y, y_step_q4, w, h);
+ vpx_convolve8_vert_c(src, src_stride, dst, dst_stride, filter, x0_q4,
+ x_step_q4, y0_q4, y_step_q4, w, h);
break;
}
}
--- a/vpx_dsp/mips/convolve8_avg_dspr2.c
+++ b/vpx_dsp/mips/convolve8_avg_dspr2.c
@@ -334,15 +334,16 @@
void vpx_convolve8_avg_vert_dspr2(const uint8_t *src, ptrdiff_t src_stride,
uint8_t *dst, ptrdiff_t dst_stride,
- const int16_t *filter_x, int x_step_q4,
- const int16_t *filter_y, int y_step_q4, int w,
-                                  int h) {+ const InterpKernel *filter, int x0_q4,
+ int32_t x_step_q4, int y0_q4, int y_step_q4,
+                                  int w, int h) {+ const int16_t *const filter_y = filter[y0_q4];
assert(y_step_q4 == 16);
assert(((const int32_t *)filter_y)[1] != 0x800000);
   if (((const int32_t *)filter_y)[0] == 0) {- vpx_convolve2_avg_vert_dspr2(src, src_stride, dst, dst_stride, filter_x,
- x_step_q4, filter_y, y_step_q4, w, h);
+ vpx_convolve2_avg_vert_dspr2(src, src_stride, dst, dst_stride, filter,
+ x0_q4, x_step_q4, y0_q4, y_step_q4, w, h);
   } else {uint32_t pos = 38;
@@ -367,8 +368,8 @@
h);
break;
default:
- vpx_convolve8_avg_vert_c(src, src_stride, dst, dst_stride, filter_x,
- x_step_q4, filter_y, y_step_q4, w, h);
+ vpx_convolve8_avg_vert_c(src, src_stride, dst, dst_stride, filter,
+ x0_q4, x_step_q4, y0_q4, y_step_q4, w, h);
break;
}
}
@@ -376,8 +377,8 @@
void vpx_convolve8_avg_dspr2(const uint8_t *src, ptrdiff_t src_stride,
uint8_t *dst, ptrdiff_t dst_stride,
- const int16_t *filter_x, int x_step_q4,
- const int16_t *filter_y, int y_step_q4, int w,
+ const InterpKernel *filter, int x0_q4,
+ int32_t x_step_q4, int y0_q4, int y_step_q4, int w,
                              int h) {/* Fixed size intermediate buffer places limits on parameters. */
DECLARE_ALIGNED(32, uint8_t, temp[64 * 135]);
@@ -390,24 +391,26 @@
if (intermediate_height < h) intermediate_height = h;
- vpx_convolve8_horiz(src - (src_stride * 3), src_stride, temp, 64, filter_x,
- x_step_q4, filter_y, y_step_q4, w, intermediate_height);
+ vpx_convolve8_horiz(src - (src_stride * 3), src_stride, temp, 64, filter,
+ x0_q4, x_step_q4, y0_q4, y_step_q4, w,
+ intermediate_height);
- vpx_convolve8_avg_vert(temp + 64 * 3, 64, dst, dst_stride, filter_x,
- x_step_q4, filter_y, y_step_q4, w, h);
+ vpx_convolve8_avg_vert(temp + 64 * 3, 64, dst, dst_stride, filter, x0_q4,
+ x_step_q4, y0_q4, y_step_q4, w, h);
}
void vpx_convolve_avg_dspr2(const uint8_t *src, ptrdiff_t src_stride,
uint8_t *dst, ptrdiff_t dst_stride,
- const int16_t *filter_x, int filter_x_stride,
- const int16_t *filter_y, int filter_y_stride, int w,
+ const InterpKernel *filter, int x0_q4,
+ int32_t x_step_q4, int y0_q4, int y_step_q4, int w,
                             int h) {int x, y;
uint32_t tp1, tp2, tn1, tp3, tp4, tn2;
- (void)filter_x;
- (void)filter_x_stride;
- (void)filter_y;
- (void)filter_y_stride;
+ (void)filter;
+ (void)x0_q4;
+ (void)x_step_q4;
+ (void)y0_q4;
+ (void)y_step_q4;
/* prefetch data to cache memory */
prefetch_load(src);
--- a/vpx_dsp/mips/convolve8_avg_horiz_dspr2.c
+++ b/vpx_dsp/mips/convolve8_avg_horiz_dspr2.c
@@ -938,15 +938,16 @@
void vpx_convolve8_avg_horiz_dspr2(const uint8_t *src, ptrdiff_t src_stride,
uint8_t *dst, ptrdiff_t dst_stride,
- const int16_t *filter_x, int x_step_q4,
- const int16_t *filter_y, int y_step_q4,
+ const InterpKernel *filter, int x0_q4,
+ int32_t x_step_q4, int y0_q4, int y_step_q4,
                                    int w, int h) {+ const int16_t *const filter_x = filter[x0_q4];
assert(x_step_q4 == 16);
assert(((const int32_t *)filter_x)[1] != 0x800000);
   if (((const int32_t *)filter_x)[0] == 0) {- vpx_convolve2_avg_horiz_dspr2(src, src_stride, dst, dst_stride, filter_x,
- x_step_q4, filter_y, y_step_q4, w, h);
+ vpx_convolve2_avg_horiz_dspr2(src, src_stride, dst, dst_stride, filter,
+ x0_q4, x_step_q4, y0_q4, y_step_q4, w, h);
   } else {uint32_t pos = 38;
@@ -987,9 +988,8 @@
h);
break;
default:
- vpx_convolve8_avg_horiz_c(src + 3, src_stride, dst, dst_stride,
- filter_x, x_step_q4, filter_y, y_step_q4, w,
- h);
+ vpx_convolve8_avg_horiz_c(src + 3, src_stride, dst, dst_stride, filter,
+ x0_q4, x_step_q4, y0_q4, y_step_q4, w, h);
break;
}
}
--- a/vpx_dsp/mips/convolve8_dspr2.c
+++ b/vpx_dsp/mips/convolve8_dspr2.c
@@ -1296,9 +1296,11 @@
}
void vpx_convolve8_dspr2(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst,
- ptrdiff_t dst_stride, const int16_t *filter_x,
- int x_step_q4, const int16_t *filter_y, int y_step_q4,
+ ptrdiff_t dst_stride, const InterpKernel *filter,
+ int x0_q4, int32_t x_step_q4, int y0_q4, int y_step_q4,
                          int w, int h) {+ const int16_t *const filter_x = filter[x0_q4];
+ const int16_t *const filter_y = filter[y0_q4];
DECLARE_ALIGNED(32, uint8_t, temp[64 * 135]);
int32_t intermediate_height = ((h * y_step_q4) >> 4) + 7;
uint32_t pos = 38;
@@ -1395,14 +1397,15 @@
void vpx_convolve_copy_dspr2(const uint8_t *src, ptrdiff_t src_stride,
uint8_t *dst, ptrdiff_t dst_stride,
- const int16_t *filter_x, int filter_x_stride,
- const int16_t *filter_y, int filter_y_stride,
-                             int w, int h) {+ const InterpKernel *filter, int x0_q4,
+ int x_step_q4, int y0_q4, int y_step_q4, int w,
+                             int h) {int x, y;
- (void)filter_x;
- (void)filter_x_stride;
- (void)filter_y;
- (void)filter_y_stride;
+ (void)filter;
+ (void)x0_q4;
+ (void)x_step_q4;
+ (void)y0_q4;
+ (void)y_step_q4;
/* prefetch data to cache memory */
prefetch_load(src);
--- a/vpx_dsp/mips/convolve8_horiz_dspr2.c
+++ b/vpx_dsp/mips/convolve8_horiz_dspr2.c
@@ -818,15 +818,16 @@
void vpx_convolve8_horiz_dspr2(const uint8_t *src, ptrdiff_t src_stride,
uint8_t *dst, ptrdiff_t dst_stride,
- const int16_t *filter_x, int x_step_q4,
- const int16_t *filter_y, int y_step_q4, int w,
+ const InterpKernel *filter, int x0_q4,
+ int x_step_q4, int y0_q4, int y_step_q4, int w,
                                int h) {+ const int16_t *const filter_x = filter[x0_q4];
assert(x_step_q4 == 16);
assert(((const int32_t *)filter_x)[1] != 0x800000);
   if (((const int32_t *)filter_x)[0] == 0) {- vpx_convolve2_horiz_dspr2(src, src_stride, dst, dst_stride, filter_x,
- x_step_q4, filter_y, y_step_q4, w, h);
+ vpx_convolve2_horiz_dspr2(src, src_stride, dst, dst_stride, filter, x0_q4,
+ x_step_q4, y0_q4, y_step_q4, w, h);
   } else {uint32_t pos = 38;
@@ -868,8 +869,8 @@
(int32_t)dst_stride, filter_x, (int32_t)h);
break;
default:
- vpx_convolve8_horiz_c(src + 3, src_stride, dst, dst_stride, filter_x,
- x_step_q4, filter_y, y_step_q4, w, h);
+ vpx_convolve8_horiz_c(src + 3, src_stride, dst, dst_stride, filter,
+ x0_q4, x_step_q4, y0_q4, y_step_q4, w, h);
break;
}
}
--- a/vpx_dsp/mips/convolve8_vert_dspr2.c
+++ b/vpx_dsp/mips/convolve8_vert_dspr2.c
@@ -318,15 +318,16 @@
void vpx_convolve8_vert_dspr2(const uint8_t *src, ptrdiff_t src_stride,
uint8_t *dst, ptrdiff_t dst_stride,
- const int16_t *filter_x, int x_step_q4,
- const int16_t *filter_y, int y_step_q4, int w,
+ const InterpKernel *filter, int x0_q4,
+ int x_step_q4, int y0_q4, int y_step_q4, int w,
                               int h) {+ const int16_t *const filter_y = filter[y0_q4];
assert(y_step_q4 == 16);
assert(((const int32_t *)filter_y)[1] != 0x800000);
   if (((const int32_t *)filter_y)[0] == 0) {- vpx_convolve2_vert_dspr2(src, src_stride, dst, dst_stride, filter_x,
- x_step_q4, filter_y, y_step_q4, w, h);
+ vpx_convolve2_vert_dspr2(src, src_stride, dst, dst_stride, filter, x0_q4,
+ x_step_q4, y0_q4, y_step_q4, w, h);
   } else {uint32_t pos = 38;
@@ -349,8 +350,8 @@
convolve_vert_64_dspr2(src, src_stride, dst, dst_stride, filter_y, h);
break;
default:
- vpx_convolve8_vert_c(src, src_stride, dst, dst_stride, filter_x,
- x_step_q4, filter_y, y_step_q4, w, h);
+ vpx_convolve8_vert_c(src, src_stride, dst, dst_stride, filter, x0_q4,
+ x_step_q4, y0_q4, y_step_q4, w, h);
break;
}
}
--- a/vpx_dsp/mips/convolve_common_dspr2.h
+++ b/vpx_dsp/mips/convolve_common_dspr2.h
@@ -24,21 +24,21 @@
#if HAVE_DSPR2
void vpx_convolve2_horiz_dspr2(const uint8_t *src, ptrdiff_t src_stride,
uint8_t *dst, ptrdiff_t dst_stride,
- const int16_t *filter_x, int x_step_q4,
- const int16_t *filter_y, int y_step_q4, int w,
- int h);
+ const InterpKernel *filter, int x0_q4,
+ int32_t x_step_q4, int y0_q4, int y_step_q4,
+ int w, int h);
void vpx_convolve2_avg_horiz_dspr2(const uint8_t *src, ptrdiff_t src_stride,
uint8_t *dst, ptrdiff_t dst_stride,
- const int16_t *filter_x, int x_step_q4,
- const int16_t *filter_y, int y_step_q4,
+ const InterpKernel *filter, int x0_q4,
+ int32_t x_step_q4, int y0_q4, int y_step_q4,
int w, int h);
void vpx_convolve2_avg_vert_dspr2(const uint8_t *src, ptrdiff_t src_stride,
uint8_t *dst, ptrdiff_t dst_stride,
- const int16_t *filter_x, int x_step_q4,
- const int16_t *filter_y, int y_step_q4, int w,
- int h);
+ const InterpKernel *filter, int x0_q4,
+ int32_t x_step_q4, int y0_q4, int y_step_q4,
+ int w, int h);
void vpx_convolve2_dspr2(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst,
ptrdiff_t dst_stride, const int16_t *filter, int w,
@@ -46,9 +46,9 @@
void vpx_convolve2_vert_dspr2(const uint8_t *src, ptrdiff_t src_stride,
uint8_t *dst, ptrdiff_t dst_stride,
- const int16_t *filter_x, int x_step_q4,
- const int16_t *filter_y, int y_step_q4, int w,
- int h);
+ const InterpKernel *filter, int x0_q4,
+ int32_t x_step_q4, int y0_q4, int y_step_q4,
+ int w, int h);
#endif // #if HAVE_DSPR2
#ifdef __cplusplus
--- a/vpx_dsp/mips/vpx_convolve8_avg_horiz_msa.c
+++ b/vpx_dsp/mips/vpx_convolve8_avg_horiz_msa.c
@@ -633,9 +633,10 @@
void vpx_convolve8_avg_horiz_msa(const uint8_t *src, ptrdiff_t src_stride,
uint8_t *dst, ptrdiff_t dst_stride,
- const int16_t *filter_x, int x_step_q4,
- const int16_t *filter_y, int y_step_q4, int w,
+ const InterpKernel *filter, int x0_q4,
+ int x_step_q4, int y0_q4, int y_step_q4, int w,
                                  int h) {+ const int16_t *const filter_x = filter[x0_q4];
int8_t cnt, filt_hor[8];
assert(x_step_q4 == 16);
@@ -668,8 +669,8 @@
(int32_t)dst_stride, &filt_hor[3], h);
break;
default:
- vpx_convolve8_avg_horiz_c(src, src_stride, dst, dst_stride, filter_x,
- x_step_q4, filter_y, y_step_q4, w, h);
+ vpx_convolve8_avg_horiz_c(src, src_stride, dst, dst_stride, filter,
+ x0_q4, x_step_q4, y0_q4, y_step_q4, w, h);
break;
}
   } else {@@ -695,8 +696,8 @@
(int32_t)dst_stride, filt_hor, h);
break;
default:
- vpx_convolve8_avg_horiz_c(src, src_stride, dst, dst_stride, filter_x,
- x_step_q4, filter_y, y_step_q4, w, h);
+ vpx_convolve8_avg_horiz_c(src, src_stride, dst, dst_stride, filter,
+ x0_q4, x_step_q4, y0_q4, y_step_q4, w, h);
break;
}
}
--- a/vpx_dsp/mips/vpx_convolve8_avg_msa.c
+++ b/vpx_dsp/mips/vpx_convolve8_avg_msa.c
@@ -516,9 +516,10 @@
void vpx_convolve8_avg_msa(const uint8_t *src, ptrdiff_t src_stride,
uint8_t *dst, ptrdiff_t dst_stride,
- const int16_t *filter_x, int x_step_q4,
- const int16_t *filter_y, int y_step_q4, int w,
-                           int h) {+ const InterpKernel *filter, int x0_q4, int x_step_q4,
+                           int y0_q4, int y_step_q4, int w, int h) {+ const int16_t *const filter_x = filter[x0_q4];
+ const int16_t *const filter_y = filter[y0_q4];
int8_t cnt, filt_hor[8], filt_ver[8];
assert(x_step_q4 == 16);
@@ -560,14 +561,14 @@
&filt_hor[3], &filt_ver[3], h);
break;
default:
- vpx_convolve8_avg_c(src, src_stride, dst, dst_stride, filter_x,
- x_step_q4, filter_y, y_step_q4, w, h);
+ vpx_convolve8_avg_c(src, src_stride, dst, dst_stride, filter, x0_q4,
+ x_step_q4, y0_q4, y_step_q4, w, h);
break;
}
} else if (((const int32_t *)filter_x)[0] == 0 ||
              ((const int32_t *)filter_y)[0] == 0) {- vpx_convolve8_avg_c(src, src_stride, dst, dst_stride, filter_x, x_step_q4,
- filter_y, y_step_q4, w, h);
+ vpx_convolve8_avg_c(src, src_stride, dst, dst_stride, filter, x0_q4,
+ x_step_q4, y0_q4, y_step_q4, w, h);
   } else {     switch (w) {case 4:
@@ -596,8 +597,8 @@
filt_ver, h);
break;
default:
- vpx_convolve8_avg_c(src, src_stride, dst, dst_stride, filter_x,
- x_step_q4, filter_y, y_step_q4, w, h);
+ vpx_convolve8_avg_c(src, src_stride, dst, dst_stride, filter, x0_q4,
+ x_step_q4, y0_q4, y_step_q4, w, h);
break;
}
}
--- a/vpx_dsp/mips/vpx_convolve8_avg_vert_msa.c
+++ b/vpx_dsp/mips/vpx_convolve8_avg_vert_msa.c
@@ -605,9 +605,10 @@
void vpx_convolve8_avg_vert_msa(const uint8_t *src, ptrdiff_t src_stride,
uint8_t *dst, ptrdiff_t dst_stride,
- const int16_t *filter_x, int x_step_q4,
- const int16_t *filter_y, int y_step_q4, int w,
+ const InterpKernel *filter, int x0_q4,
+ int x_step_q4, int y0_q4, int y_step_q4, int w,
                                 int h) {+ const int16_t *const filter_y = filter[y0_q4];
int8_t cnt, filt_ver[8];
assert(y_step_q4 == 16);
@@ -640,8 +641,8 @@
(int32_t)dst_stride, &filt_ver[3], h);
break;
default:
- vpx_convolve8_avg_vert_c(src, src_stride, dst, dst_stride, filter_x,
- x_step_q4, filter_y, y_step_q4, w, h);
+ vpx_convolve8_avg_vert_c(src, src_stride, dst, dst_stride, filter,
+ x0_q4, x_step_q4, y0_q4, y_step_q4, w, h);
break;
}
   } else {@@ -668,8 +669,8 @@
(int32_t)dst_stride, filt_ver, h);
break;
default:
- vpx_convolve8_avg_vert_c(src, src_stride, dst, dst_stride, filter_x,
- x_step_q4, filter_y, y_step_q4, w, h);
+ vpx_convolve8_avg_vert_c(src, src_stride, dst, dst_stride, filter,
+ x0_q4, x_step_q4, y0_q4, y_step_q4, w, h);
break;
}
}
--- a/vpx_dsp/mips/vpx_convolve8_horiz_msa.c
+++ b/vpx_dsp/mips/vpx_convolve8_horiz_msa.c
@@ -621,9 +621,10 @@
void vpx_convolve8_horiz_msa(const uint8_t *src, ptrdiff_t src_stride,
uint8_t *dst, ptrdiff_t dst_stride,
- const int16_t *filter_x, int x_step_q4,
- const int16_t *filter_y, int y_step_q4, int w,
+ const InterpKernel *filter, int x0_q4,
+ int x_step_q4, int y0_q4, int y_step_q4, int w,
                              int h) {+ const int16_t *const filter_x = filter[x0_q4];
int8_t cnt, filt_hor[8];
assert(x_step_q4 == 16);
@@ -656,8 +657,8 @@
&filt_hor[3], h);
break;
default:
- vpx_convolve8_horiz_c(src, src_stride, dst, dst_stride, filter_x,
- x_step_q4, filter_y, y_step_q4, w, h);
+ vpx_convolve8_horiz_c(src, src_stride, dst, dst_stride, filter, x0_q4,
+ x_step_q4, y0_q4, y_step_q4, w, h);
break;
}
   } else {@@ -683,8 +684,8 @@
filt_hor, h);
break;
default:
- vpx_convolve8_horiz_c(src, src_stride, dst, dst_stride, filter_x,
- x_step_q4, filter_y, y_step_q4, w, h);
+ vpx_convolve8_horiz_c(src, src_stride, dst, dst_stride, filter, x0_q4,
+ x_step_q4, y0_q4, y_step_q4, w, h);
break;
}
}
--- a/vpx_dsp/mips/vpx_convolve8_msa.c
+++ b/vpx_dsp/mips/vpx_convolve8_msa.c
@@ -541,9 +541,11 @@
}
void vpx_convolve8_msa(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst,
- ptrdiff_t dst_stride, const int16_t *filter_x,
- int32_t x_step_q4, const int16_t *filter_y,
+ ptrdiff_t dst_stride, const InterpKernel *filter,
+ int x0_q4, int32_t x_step_q4, int y0_q4,
                        int32_t y_step_q4, int32_t w, int32_t h) {+ const int16_t *const filter_x = filter[x0_q4];
+ const int16_t *const filter_y = filter[y0_q4];
int8_t cnt, filt_hor[8], filt_ver[8];
assert(x_step_q4 == 16);
@@ -585,14 +587,14 @@
&filt_ver[3], (int32_t)h);
break;
default:
- vpx_convolve8_c(src, src_stride, dst, dst_stride, filter_x, x_step_q4,
- filter_y, y_step_q4, w, h);
+ vpx_convolve8_c(src, src_stride, dst, dst_stride, filter, x0_q4,
+ x_step_q4, y0_q4, y_step_q4, w, h);
break;
}
} else if (((const int32_t *)filter_x)[0] == 0 ||
              ((const int32_t *)filter_y)[0] == 0) {- vpx_convolve8_c(src, src_stride, dst, dst_stride, filter_x, x_step_q4,
- filter_y, y_step_q4, w, h);
+ vpx_convolve8_c(src, src_stride, dst, dst_stride, filter, x0_q4, x_step_q4,
+ y0_q4, y_step_q4, w, h);
   } else {     switch (w) {case 4:
@@ -621,8 +623,8 @@
(int32_t)h);
break;
default:
- vpx_convolve8_c(src, src_stride, dst, dst_stride, filter_x, x_step_q4,
- filter_y, y_step_q4, w, h);
+ vpx_convolve8_c(src, src_stride, dst, dst_stride, filter, x0_q4,
+ x_step_q4, y0_q4, y_step_q4, w, h);
break;
}
}
--- a/vpx_dsp/mips/vpx_convolve8_vert_msa.c
+++ b/vpx_dsp/mips/vpx_convolve8_vert_msa.c
@@ -628,9 +628,10 @@
void vpx_convolve8_vert_msa(const uint8_t *src, ptrdiff_t src_stride,
uint8_t *dst, ptrdiff_t dst_stride,
- const int16_t *filter_x, int x_step_q4,
- const int16_t *filter_y, int y_step_q4, int w,
+ const InterpKernel *filter, int x0_q4,
+ int32_t x_step_q4, int y0_q4, int y_step_q4, int w,
                             int h) {+ const int16_t *const filter_y = filter[y0_q4];
int8_t cnt, filt_ver[8];
assert(y_step_q4 == 16);
@@ -663,8 +664,8 @@
&filt_ver[3], h);
break;
default:
- vpx_convolve8_vert_c(src, src_stride, dst, dst_stride, filter_x,
- x_step_q4, filter_y, y_step_q4, w, h);
+ vpx_convolve8_vert_c(src, src_stride, dst, dst_stride, filter, x0_q4,
+ x_step_q4, y0_q4, y_step_q4, w, h);
break;
}
   } else {@@ -690,8 +691,8 @@
filt_ver, h);
break;
default:
- vpx_convolve8_vert_c(src, src_stride, dst, dst_stride, filter_x,
- x_step_q4, filter_y, y_step_q4, w, h);
+ vpx_convolve8_vert_c(src, src_stride, dst, dst_stride, filter, x0_q4,
+ x_step_q4, y0_q4, y_step_q4, w, h);
break;
}
}
--- a/vpx_dsp/mips/vpx_convolve_avg_msa.c
+++ b/vpx_dsp/mips/vpx_convolve_avg_msa.c
@@ -189,13 +189,14 @@
void vpx_convolve_avg_msa(const uint8_t *src, ptrdiff_t src_stride,
uint8_t *dst, ptrdiff_t dst_stride,
- const int16_t *filter_x, int32_t filter_x_stride,
- const int16_t *filter_y, int32_t filter_y_stride,
+ const InterpKernel *filter, int x0_q4,
+ int32_t x_step_q4, int y0_q4, int32_t y_step_q4,
                           int32_t w, int32_t h) {- (void)filter_x;
- (void)filter_y;
- (void)filter_x_stride;
- (void)filter_y_stride;
+ (void)filter;
+ (void)x0_q4;
+ (void)x_step_q4;
+ (void)y0_q4;
+ (void)y_step_q4;
   switch (w) {     case 4: {--- a/vpx_dsp/mips/vpx_convolve_copy_msa.c
+++ b/vpx_dsp/mips/vpx_convolve_copy_msa.c
@@ -199,13 +199,14 @@
void vpx_convolve_copy_msa(const uint8_t *src, ptrdiff_t src_stride,
uint8_t *dst, ptrdiff_t dst_stride,
- const int16_t *filter_x, int32_t filter_x_stride,
- const int16_t *filter_y, int32_t filter_y_stride,
+ const InterpKernel *filter, int x0_q4,
+ int32_t x_step_q4, int y0_q4, int32_t y_step_q4,
                            int32_t w, int32_t h) {- (void)filter_x;
- (void)filter_y;
- (void)filter_x_stride;
- (void)filter_y_stride;
+ (void)filter;
+ (void)x0_q4;
+ (void)x_step_q4;
+ (void)y0_q4;
+ (void)y_step_q4;
   switch (w) {     case 4: {--- a/vpx_dsp/ppc/vpx_convolve_vsx.c
+++ b/vpx_dsp/ppc/vpx_convolve_vsx.c
@@ -53,13 +53,13 @@
void vpx_convolve_copy_vsx(const uint8_t *src, ptrdiff_t src_stride,
uint8_t *dst, ptrdiff_t dst_stride,
- const int16_t *filter_x, int32_t filter_x_stride,
- const int16_t *filter_y, int32_t filter_y_stride,
-                           int32_t w, int32_t h) {- (void)filter_x;
- (void)filter_y;
- (void)filter_x_stride;
- (void)filter_y_stride;
+ const InterpKernel *filter, int x0_q4, int x_step_q4,
+                           int y0_q4, int32_t y_step_q4, int32_t w, int32_t h) {+ (void)filter;
+ (void)x0_q4;
+ (void)x_step_q4;
+ (void)y0_q4;
+ (void)y_step_q4;
   switch (w) {     case 16: {@@ -132,14 +132,8 @@
void vpx_convolve_avg_vsx(const uint8_t *src, ptrdiff_t src_stride,
uint8_t *dst, ptrdiff_t dst_stride,
- const int16_t *filter_x, int32_t filter_x_stride,
- const int16_t *filter_y, int32_t filter_y_stride,
-                          int32_t w, int32_t h) {- (void)filter_x;
- (void)filter_y;
- (void)filter_x_stride;
- (void)filter_y_stride;
-
+ const InterpKernel *filter, int x0_q4, int x_step_q4,
+                          int y0_q4, int32_t y_step_q4, int32_t w, int32_t h) {   switch (w) {     case 16: {avg_w16(src, src_stride, dst, dst_stride, h);
@@ -154,8 +148,8 @@
break;
}
     default: {- vpx_convolve_avg_c(src, src_stride, dst, dst_stride, filter_x,
- filter_x_stride, filter_y, filter_y_stride, w, h);
+ vpx_convolve_avg_c(src, src_stride, dst, dst_stride, filter, x0_q4,
+ x_step_q4, y0_q4, y_step_q4, w, h);
break;
}
}
@@ -299,9 +293,9 @@
static inline void convolve(const uint8_t *src, ptrdiff_t src_stride,
uint8_t *dst, ptrdiff_t dst_stride,
- const InterpKernel *const x_filters, int x0_q4,
- int x_step_q4, const InterpKernel *const y_filters,
-                            int y0_q4, int y_step_q4, int w, int h) {+ const InterpKernel *const filter, int x0_q4,
+ int x_step_q4, int y0_q4, int y_step_q4, int w,
+                            int h) {// Note: Fixed size intermediate buffer, temp, places limits on parameters.
// 2d filtering proceeds in 2 steps:
// (1) Interpolate horizontally into an intermediate buffer, temp.
@@ -324,95 +318,77 @@
assert(x_step_q4 <= 32);
convolve_horiz(src - src_stride * (SUBPEL_TAPS / 2 - 1), src_stride, temp, 64,
- x_filters, x0_q4, x_step_q4, w, intermediate_height);
- convolve_vert(temp + 64 * (SUBPEL_TAPS / 2 - 1), 64, dst, dst_stride,
- y_filters, y0_q4, y_step_q4, w, h);
+ filter, x0_q4, x_step_q4, w, intermediate_height);
+ convolve_vert(temp + 64 * (SUBPEL_TAPS / 2 - 1), 64, dst, dst_stride, filter,
+ y0_q4, y_step_q4, w, h);
}
void vpx_convolve8_horiz_vsx(const uint8_t *src, ptrdiff_t src_stride,
uint8_t *dst, ptrdiff_t dst_stride,
- const int16_t *filter_x, int x_step_q4,
- const int16_t *filter_y, int y_step_q4, int w,
+ const InterpKernel *filter, int x0_q4,
+ int x_step_q4, int y0_q4, int y_step_q4, int w,
                              int h) {- const InterpKernel *const filters_x = get_filter_base(filter_x);
- const int x0_q4 = get_filter_offset(filter_x, filters_x);
-
- (void)filter_y;
+ (void)y0_q4;
(void)y_step_q4;
- convolve_horiz(src, src_stride, dst, dst_stride, filters_x, x0_q4, x_step_q4,
- w, h);
+ convolve_horiz(src, src_stride, dst, dst_stride, filter, x0_q4, x_step_q4, w,
+ h);
}
void vpx_convolve8_avg_horiz_vsx(const uint8_t *src, ptrdiff_t src_stride,
uint8_t *dst, ptrdiff_t dst_stride,
- const int16_t *filter_x, int x_step_q4,
- const int16_t *filter_y, int y_step_q4, int w,
+ const InterpKernel *filter, int x0_q4,
+ int x_step_q4, int y0_q4, int y_step_q4, int w,
                                  int h) {- const InterpKernel *const filters_x = get_filter_base(filter_x);
- const int x0_q4 = get_filter_offset(filter_x, filters_x);
-
- (void)filter_y;
+ (void)y0_q4;
(void)y_step_q4;
- convolve_avg_horiz(src, src_stride, dst, dst_stride, filters_x, x0_q4,
- x_step_q4, w, h);
+ convolve_avg_horiz(src, src_stride, dst, dst_stride, filter, x0_q4, x_step_q4,
+ w, h);
}
void vpx_convolve8_vert_vsx(const uint8_t *src, ptrdiff_t src_stride,
uint8_t *dst, ptrdiff_t dst_stride,
- const int16_t *filter_x, int x_step_q4,
- const int16_t *filter_y, int y_step_q4, int w,
+ const InterpKernel *filter, int x0_q4,
+ int x_step_q4, int y0_q4, int y_step_q4, int w,
                             int h) {- const InterpKernel *const filters_y = get_filter_base(filter_y);
- const int y0_q4 = get_filter_offset(filter_y, filters_y);
-
- (void)filter_x;
+ (void)x0_q4;
(void)x_step_q4;
- convolve_vert(src, src_stride, dst, dst_stride, filters_y, y0_q4, y_step_q4,
- w, h);
+ convolve_vert(src, src_stride, dst, dst_stride, filter, y0_q4, y_step_q4, w,
+ h);
}
void vpx_convolve8_avg_vert_vsx(const uint8_t *src, ptrdiff_t src_stride,
uint8_t *dst, ptrdiff_t dst_stride,
- const int16_t *filter_x, int x_step_q4,
- const int16_t *filter_y, int y_step_q4, int w,
+ const InterpKernel *filter, int x0_q4,
+ int x_step_q4, int y0_q4, int y_step_q4, int w,
                                 int h) {- const InterpKernel *const filters_y = get_filter_base(filter_y);
- const int y0_q4 = get_filter_offset(filter_y, filters_y);
-
- (void)filter_x;
+ (void)x0_q4;
(void)x_step_q4;
- convolve_avg_vert(src, src_stride, dst, dst_stride, filters_y, y0_q4,
- y_step_q4, w, h);
+ convolve_avg_vert(src, src_stride, dst, dst_stride, filter, y0_q4, y_step_q4,
+ w, h);
}
void vpx_convolve8_vsx(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst,
- ptrdiff_t dst_stride, const int16_t *filter_x,
- int x_step_q4, const int16_t *filter_y, int y_step_q4,
+ ptrdiff_t dst_stride, const InterpKernel *filter,
+ int x0_q4, int x_step_q4, int y0_q4, int y_step_q4,
                        int w, int h) {- const InterpKernel *const filters_x = get_filter_base(filter_x);
- const int x0_q4 = get_filter_offset(filter_x, filters_x);
- const InterpKernel *const filters_y = get_filter_base(filter_y);
- const int y0_q4 = get_filter_offset(filter_y, filters_y);
-
- convolve(src, src_stride, dst, dst_stride, filters_x, x0_q4, x_step_q4,
- filters_y, y0_q4, y_step_q4, w, h);
+ convolve(src, src_stride, dst, dst_stride, filter, x0_q4, x_step_q4, y0_q4,
+ y_step_q4, w, h);
}
void vpx_convolve8_avg_vsx(const uint8_t *src, ptrdiff_t src_stride,
uint8_t *dst, ptrdiff_t dst_stride,
- const int16_t *filter_x, int x_step_q4,
- const int16_t *filter_y, int y_step_q4, int w,
-                           int h) {+ const InterpKernel *filter, int x0_q4, int x_step_q4,
+                           int y0_q4, int y_step_q4, int w, int h) {// Fixed size intermediate buffer places limits on parameters.
DECLARE_ALIGNED(16, uint8_t, temp[64 * 64]);
assert(w <= 64);
assert(h <= 64);
- vpx_convolve8_vsx(src, src_stride, temp, 64, filter_x, x_step_q4, filter_y,
+ vpx_convolve8_vsx(src, src_stride, temp, 64, filter, x0_q4, x_step_q4, y0_q4,
y_step_q4, w, h);
- vpx_convolve_avg_vsx(temp, 64, dst, dst_stride, NULL, 0, NULL, 0, w, h);
+ vpx_convolve_avg_vsx(temp, 64, dst, dst_stride, NULL, 0, 0, 0, 0, w, h);
}
--- a/vpx_dsp/vpx_convolve.c
+++ b/vpx_dsp/vpx_convolve.c
@@ -114,10 +114,9 @@
}
static void convolve(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst,
- ptrdiff_t dst_stride, const InterpKernel *const x_filters,
- int x0_q4, int x_step_q4,
- const InterpKernel *const y_filters, int y0_q4,
-                     int y_step_q4, int w, int h) {+ ptrdiff_t dst_stride, const InterpKernel *filter,
+ int x0_q4, int x_step_q4, int y0_q4, int y_step_q4, int w,
+                     int h) {// Note: Fixed size intermediate buffer, temp, places limits on parameters.
// 2d filtering proceeds in 2 steps:
// (1) Interpolate horizontally into an intermediate buffer, temp.
@@ -140,87 +139,64 @@
assert(x_step_q4 <= 32);
convolve_horiz(src - src_stride * (SUBPEL_TAPS / 2 - 1), src_stride, temp, 64,
- x_filters, x0_q4, x_step_q4, w, intermediate_height);
- convolve_vert(temp + 64 * (SUBPEL_TAPS / 2 - 1), 64, dst, dst_stride,
- y_filters, y0_q4, y_step_q4, w, h);
+ filter, x0_q4, x_step_q4, w, intermediate_height);
+ convolve_vert(temp + 64 * (SUBPEL_TAPS / 2 - 1), 64, dst, dst_stride, filter,
+ y0_q4, y_step_q4, w, h);
}
void vpx_convolve8_horiz_c(const uint8_t *src, ptrdiff_t src_stride,
uint8_t *dst, ptrdiff_t dst_stride,
- const int16_t *filter_x, int x_step_q4,
- const int16_t *filter_y, int y_step_q4, int w,
-                           int h) {- const InterpKernel *const filters_x = get_filter_base(filter_x);
- const int x0_q4 = get_filter_offset(filter_x, filters_x);
-
- (void)filter_y;
+ const InterpKernel *filter, int x0_q4, int x_step_q4,
+                           int y0_q4, int y_step_q4, int w, int h) {+ (void)y0_q4;
(void)y_step_q4;
-
- convolve_horiz(src, src_stride, dst, dst_stride, filters_x, x0_q4, x_step_q4,
- w, h);
+ convolve_horiz(src, src_stride, dst, dst_stride, filter, x0_q4, x_step_q4, w,
+ h);
}
void vpx_convolve8_avg_horiz_c(const uint8_t *src, ptrdiff_t src_stride,
uint8_t *dst, ptrdiff_t dst_stride,
- const int16_t *filter_x, int x_step_q4,
- const int16_t *filter_y, int y_step_q4, int w,
+ const InterpKernel *filter, int x0_q4,
+ int x_step_q4, int y0_q4, int y_step_q4, int w,
                                int h) {- const InterpKernel *const filters_x = get_filter_base(filter_x);
- const int x0_q4 = get_filter_offset(filter_x, filters_x);
-
- (void)filter_y;
+ (void)y0_q4;
(void)y_step_q4;
-
- convolve_avg_horiz(src, src_stride, dst, dst_stride, filters_x, x0_q4,
- x_step_q4, w, h);
+ convolve_avg_horiz(src, src_stride, dst, dst_stride, filter, x0_q4, x_step_q4,
+ w, h);
}
void vpx_convolve8_vert_c(const uint8_t *src, ptrdiff_t src_stride,
uint8_t *dst, ptrdiff_t dst_stride,
- const int16_t *filter_x, int x_step_q4,
- const int16_t *filter_y, int y_step_q4, int w,
-                          int h) {- const InterpKernel *const filters_y = get_filter_base(filter_y);
- const int y0_q4 = get_filter_offset(filter_y, filters_y);
-
- (void)filter_x;
+ const InterpKernel *filter, int x0_q4, int x_step_q4,
+                          int y0_q4, int y_step_q4, int w, int h) {+ (void)x0_q4;
(void)x_step_q4;
-
- convolve_vert(src, src_stride, dst, dst_stride, filters_y, y0_q4, y_step_q4,
- w, h);
+ convolve_vert(src, src_stride, dst, dst_stride, filter, y0_q4, y_step_q4, w,
+ h);
}
void vpx_convolve8_avg_vert_c(const uint8_t *src, ptrdiff_t src_stride,
uint8_t *dst, ptrdiff_t dst_stride,
- const int16_t *filter_x, int x_step_q4,
- const int16_t *filter_y, int y_step_q4, int w,
+ const InterpKernel *filter, int x0_q4,
+ int x_step_q4, int y0_q4, int y_step_q4, int w,
                               int h) {- const InterpKernel *const filters_y = get_filter_base(filter_y);
- const int y0_q4 = get_filter_offset(filter_y, filters_y);
-
- (void)filter_x;
+ (void)x0_q4;
(void)x_step_q4;
-
- convolve_avg_vert(src, src_stride, dst, dst_stride, filters_y, y0_q4,
- y_step_q4, w, h);
+ convolve_avg_vert(src, src_stride, dst, dst_stride, filter, y0_q4, y_step_q4,
+ w, h);
}
void vpx_convolve8_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst,
- ptrdiff_t dst_stride, const int16_t *filter_x,
- int x_step_q4, const int16_t *filter_y, int y_step_q4,
-                     int w, int h) {- const InterpKernel *const filters_x = get_filter_base(filter_x);
- const int x0_q4 = get_filter_offset(filter_x, filters_x);
- const InterpKernel *const filters_y = get_filter_base(filter_y);
- const int y0_q4 = get_filter_offset(filter_y, filters_y);
-
- convolve(src, src_stride, dst, dst_stride, filters_x, x0_q4, x_step_q4,
- filters_y, y0_q4, y_step_q4, w, h);
+ ptrdiff_t dst_stride, const InterpKernel *filter,
+ int x0_q4, int x_step_q4, int y0_q4, int y_step_q4, int w,
+                     int h) {+ convolve(src, src_stride, dst, dst_stride, filter, x0_q4, x_step_q4, y0_q4,
+ y_step_q4, w, h);
}
void vpx_convolve8_avg_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst,
- ptrdiff_t dst_stride, const int16_t *filter_x,
- int x_step_q4, const int16_t *filter_y, int y_step_q4,
+ ptrdiff_t dst_stride, const InterpKernel *filter,
+ int x0_q4, int x_step_q4, int y0_q4, int y_step_q4,
                          int w, int h) {// Fixed size intermediate buffer places limits on parameters.
DECLARE_ALIGNED(16, uint8_t, temp[64 * 64]);
@@ -227,21 +203,22 @@
assert(w <= 64);
assert(h <= 64);
- vpx_convolve8_c(src, src_stride, temp, 64, filter_x, x_step_q4, filter_y,
+ vpx_convolve8_c(src, src_stride, temp, 64, filter, x0_q4, x_step_q4, y0_q4,
y_step_q4, w, h);
- vpx_convolve_avg_c(temp, 64, dst, dst_stride, NULL, 0, NULL, 0, w, h);
+ vpx_convolve_avg_c(temp, 64, dst, dst_stride, NULL, 0, 0, 0, 0, w, h);
}
void vpx_convolve_copy_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst,
- ptrdiff_t dst_stride, const int16_t *filter_x,
- int filter_x_stride, const int16_t *filter_y,
-                         int filter_y_stride, int w, int h) {+ ptrdiff_t dst_stride, const InterpKernel *filter,
+ int x0_q4, int x_step_q4, int y0_q4, int y_step_q4,
+                         int w, int h) {int r;
- (void)filter_x;
- (void)filter_x_stride;
- (void)filter_y;
- (void)filter_y_stride;
+ (void)filter;
+ (void)x0_q4;
+ (void)x_step_q4;
+ (void)y0_q4;
+ (void)y_step_q4;
   for (r = h; r > 0; --r) {memcpy(dst, src, w);
@@ -251,15 +228,16 @@
}
void vpx_convolve_avg_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst,
- ptrdiff_t dst_stride, const int16_t *filter_x,
- int filter_x_stride, const int16_t *filter_y,
-                        int filter_y_stride, int w, int h) {+ ptrdiff_t dst_stride, const InterpKernel *filter,
+ int x0_q4, int x_step_q4, int y0_q4, int y_step_q4,
+                        int w, int h) {int x, y;
- (void)filter_x;
- (void)filter_x_stride;
- (void)filter_y;
- (void)filter_y_stride;
+ (void)filter;
+ (void)x0_q4;
+ (void)x_step_q4;
+ (void)y0_q4;
+ (void)y_step_q4;
   for (y = 0; y < h; ++y) {for (x = 0; x < w; ++x) dst[x] = ROUND_POWER_OF_TWO(dst[x] + src[x], 1);
@@ -269,53 +247,52 @@
}
void vpx_scaled_horiz_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst,
- ptrdiff_t dst_stride, const int16_t *filter_x,
- int x_step_q4, const int16_t *filter_y, int y_step_q4,
+ ptrdiff_t dst_stride, const InterpKernel *filter,
+ int x0_q4, int x_step_q4, int y0_q4, int y_step_q4,
                         int w, int h) {- vpx_convolve8_horiz_c(src, src_stride, dst, dst_stride, filter_x, x_step_q4,
- filter_y, y_step_q4, w, h);
+ vpx_convolve8_horiz_c(src, src_stride, dst, dst_stride, filter, x0_q4,
+ x_step_q4, y0_q4, y_step_q4, w, h);
}
void vpx_scaled_vert_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst,
- ptrdiff_t dst_stride, const int16_t *filter_x,
- int x_step_q4, const int16_t *filter_y, int y_step_q4,
+ ptrdiff_t dst_stride, const InterpKernel *filter,
+ int x0_q4, int x_step_q4, int y0_q4, int y_step_q4,
                        int w, int h) {- vpx_convolve8_vert_c(src, src_stride, dst, dst_stride, filter_x, x_step_q4,
- filter_y, y_step_q4, w, h);
+ vpx_convolve8_vert_c(src, src_stride, dst, dst_stride, filter, x0_q4,
+ x_step_q4, y0_q4, y_step_q4, w, h);
}
void vpx_scaled_2d_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst,
- ptrdiff_t dst_stride, const int16_t *filter_x,
- int x_step_q4, const int16_t *filter_y, int y_step_q4,
-                     int w, int h) {- vpx_convolve8_c(src, src_stride, dst, dst_stride, filter_x, x_step_q4,
- filter_y, y_step_q4, w, h);
+ ptrdiff_t dst_stride, const InterpKernel *filter,
+ int x0_q4, int x_step_q4, int y0_q4, int y_step_q4, int w,
+                     int h) {+ vpx_convolve8_c(src, src_stride, dst, dst_stride, filter, x0_q4, x_step_q4,
+ y0_q4, y_step_q4, w, h);
}
void vpx_scaled_avg_horiz_c(const uint8_t *src, ptrdiff_t src_stride,
uint8_t *dst, ptrdiff_t dst_stride,
- const int16_t *filter_x, int x_step_q4,
- const int16_t *filter_y, int y_step_q4, int w,
+ const InterpKernel *filter, int x0_q4,
+ int x_step_q4, int y0_q4, int y_step_q4, int w,
                             int h) {- vpx_convolve8_avg_horiz_c(src, src_stride, dst, dst_stride, filter_x,
- x_step_q4, filter_y, y_step_q4, w, h);
+ vpx_convolve8_avg_horiz_c(src, src_stride, dst, dst_stride, filter, x0_q4,
+ x_step_q4, y0_q4, y_step_q4, w, h);
}
void vpx_scaled_avg_vert_c(const uint8_t *src, ptrdiff_t src_stride,
uint8_t *dst, ptrdiff_t dst_stride,
- const int16_t *filter_x, int x_step_q4,
- const int16_t *filter_y, int y_step_q4, int w,
-                           int h) {- vpx_convolve8_avg_vert_c(src, src_stride, dst, dst_stride, filter_x,
- x_step_q4, filter_y, y_step_q4, w, h);
+ const InterpKernel *filter, int x0_q4, int x_step_q4,
+                           int y0_q4, int y_step_q4, int w, int h) {+ vpx_convolve8_avg_vert_c(src, src_stride, dst, dst_stride, filter, x0_q4,
+ x_step_q4, y0_q4, y_step_q4, w, h);
}
void vpx_scaled_avg_2d_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst,
- ptrdiff_t dst_stride, const int16_t *filter_x,
- int x_step_q4, const int16_t *filter_y, int y_step_q4,
+ ptrdiff_t dst_stride, const InterpKernel *filter,
+ int x0_q4, int x_step_q4, int y0_q4, int y_step_q4,
                          int w, int h) {- vpx_convolve8_avg_c(src, src_stride, dst, dst_stride, filter_x, x_step_q4,
- filter_y, y_step_q4, w, h);
+ vpx_convolve8_avg_c(src, src_stride, dst, dst_stride, filter, x0_q4,
+ x_step_q4, y0_q4, y_step_q4, w, h);
}
#if CONFIG_VP9_HIGHBITDEPTH
@@ -417,9 +394,9 @@
static void highbd_convolve(const uint16_t *src, ptrdiff_t src_stride,
uint16_t *dst, ptrdiff_t dst_stride,
- const InterpKernel *const x_filters, int x0_q4,
- int x_step_q4, const InterpKernel *const y_filters,
-                            int y0_q4, int y_step_q4, int w, int h, int bd) {+ const InterpKernel *filter, int x0_q4,
+ int x_step_q4, int y0_q4, int y_step_q4, int w,
+                            int h, int bd) {// Note: Fixed size intermediate buffer, temp, places limits on parameters.
// 2d filtering proceeds in 2 steps:
// (1) Interpolate horizontally into an intermediate buffer, temp.
@@ -442,90 +419,73 @@
assert(x_step_q4 <= 32);
highbd_convolve_horiz(src - src_stride * (SUBPEL_TAPS / 2 - 1), src_stride,
- temp, 64, x_filters, x0_q4, x_step_q4, w,
+ temp, 64, filter, x0_q4, x_step_q4, w,
intermediate_height, bd);
highbd_convolve_vert(temp + 64 * (SUBPEL_TAPS / 2 - 1), 64, dst, dst_stride,
- y_filters, y0_q4, y_step_q4, w, h, bd);
+ filter, y0_q4, y_step_q4, w, h, bd);
}
void vpx_highbd_convolve8_horiz_c(const uint16_t *src, ptrdiff_t src_stride,
uint16_t *dst, ptrdiff_t dst_stride,
- const int16_t *filter_x, int x_step_q4,
- const int16_t *filter_y, int y_step_q4, int w,
-                                  int h, int bd) {- const InterpKernel *const filters_x = get_filter_base(filter_x);
- const int x0_q4 = get_filter_offset(filter_x, filters_x);
-
- (void)filter_y;
+ const InterpKernel *filter, int x0_q4,
+ int x_step_q4, int y0_q4, int y_step_q4,
+                                  int w, int h, int bd) {+ (void)y0_q4;
(void)y_step_q4;
- highbd_convolve_horiz(src, src_stride, dst, dst_stride, filters_x, x0_q4,
+ highbd_convolve_horiz(src, src_stride, dst, dst_stride, filter, x0_q4,
x_step_q4, w, h, bd);
}
void vpx_highbd_convolve8_avg_horiz_c(const uint16_t *src, ptrdiff_t src_stride,
uint16_t *dst, ptrdiff_t dst_stride,
- const int16_t *filter_x, int x_step_q4,
- const int16_t *filter_y, int y_step_q4,
+ const InterpKernel *filter, int x0_q4,
+ int x_step_q4, int y0_q4, int y_step_q4,
                                       int w, int h, int bd) {- const InterpKernel *const filters_x = get_filter_base(filter_x);
- const int x0_q4 = get_filter_offset(filter_x, filters_x);
-
- (void)filter_y;
+ (void)y0_q4;
(void)y_step_q4;
- highbd_convolve_avg_horiz(src, src_stride, dst, dst_stride, filters_x, x0_q4,
+ highbd_convolve_avg_horiz(src, src_stride, dst, dst_stride, filter, x0_q4,
x_step_q4, w, h, bd);
}
void vpx_highbd_convolve8_vert_c(const uint16_t *src, ptrdiff_t src_stride,
uint16_t *dst, ptrdiff_t dst_stride,
- const int16_t *filter_x, int x_step_q4,
- const int16_t *filter_y, int y_step_q4, int w,
+ const InterpKernel *filter, int x0_q4,
+ int x_step_q4, int y0_q4, int y_step_q4, int w,
                                  int h, int bd) {- const InterpKernel *const filters_y = get_filter_base(filter_y);
- const int y0_q4 = get_filter_offset(filter_y, filters_y);
-
- (void)filter_x;
+ (void)x0_q4;
(void)x_step_q4;
- highbd_convolve_vert(src, src_stride, dst, dst_stride, filters_y, y0_q4,
+ highbd_convolve_vert(src, src_stride, dst, dst_stride, filter, y0_q4,
y_step_q4, w, h, bd);
}
void vpx_highbd_convolve8_avg_vert_c(const uint16_t *src, ptrdiff_t src_stride,
uint16_t *dst, ptrdiff_t dst_stride,
- const int16_t *filter_x, int x_step_q4,
- const int16_t *filter_y, int y_step_q4,
+ const InterpKernel *filter, int x0_q4,
+ int x_step_q4, int y0_q4, int y_step_q4,
                                      int w, int h, int bd) {- const InterpKernel *const filters_y = get_filter_base(filter_y);
- const int y0_q4 = get_filter_offset(filter_y, filters_y);
-
- (void)filter_x;
+ (void)x0_q4;
(void)x_step_q4;
- highbd_convolve_avg_vert(src, src_stride, dst, dst_stride, filters_y, y0_q4,
+ highbd_convolve_avg_vert(src, src_stride, dst, dst_stride, filter, y0_q4,
y_step_q4, w, h, bd);
}
void vpx_highbd_convolve8_c(const uint16_t *src, ptrdiff_t src_stride,
uint16_t *dst, ptrdiff_t dst_stride,
- const int16_t *filter_x, int x_step_q4,
- const int16_t *filter_y, int y_step_q4, int w,
+ const InterpKernel *filter, int x0_q4,
+ int x_step_q4, int y0_q4, int y_step_q4, int w,
                             int h, int bd) {- const InterpKernel *const filters_x = get_filter_base(filter_x);
- const int x0_q4 = get_filter_offset(filter_x, filters_x);
- const InterpKernel *const filters_y = get_filter_base(filter_y);
- const int y0_q4 = get_filter_offset(filter_y, filters_y);
-
- highbd_convolve(src, src_stride, dst, dst_stride, filters_x, x0_q4, x_step_q4,
- filters_y, y0_q4, y_step_q4, w, h, bd);
+ highbd_convolve(src, src_stride, dst, dst_stride, filter, x0_q4, x_step_q4,
+ y0_q4, y_step_q4, w, h, bd);
}
void vpx_highbd_convolve8_avg_c(const uint16_t *src, ptrdiff_t src_stride,
uint16_t *dst, ptrdiff_t dst_stride,
- const int16_t *filter_x, int x_step_q4,
- const int16_t *filter_y, int y_step_q4, int w,
+ const InterpKernel *filter, int x0_q4,
+ int x_step_q4, int y0_q4, int y_step_q4, int w,
                                 int h, int bd) {// Fixed size intermediate buffer places limits on parameters.
DECLARE_ALIGNED(16, uint16_t, temp[64 * 64]);
@@ -532,23 +492,24 @@
assert(w <= 64);
assert(h <= 64);
- vpx_highbd_convolve8_c(src, src_stride, temp, 64, filter_x, x_step_q4,
- filter_y, y_step_q4, w, h, bd);
- vpx_highbd_convolve_avg_c(temp, 64, dst, dst_stride, NULL, 0, NULL, 0, w, h,
+ vpx_highbd_convolve8_c(src, src_stride, temp, 64, filter, x0_q4, x_step_q4,
+ y0_q4, y_step_q4, w, h, bd);
+ vpx_highbd_convolve_avg_c(temp, 64, dst, dst_stride, NULL, 0, 0, 0, 0, w, h,
bd);
}
void vpx_highbd_convolve_copy_c(const uint16_t *src, ptrdiff_t src_stride,
uint16_t *dst, ptrdiff_t dst_stride,
- const int16_t *filter_x, int filter_x_stride,
- const int16_t *filter_y, int filter_y_stride,
-                                int w, int h, int bd) {+ const InterpKernel *filter, int x0_q4,
+ int x_step_q4, int y0_q4, int y_step_q4, int w,
+                                int h, int bd) {int r;
- (void)filter_x;
- (void)filter_x_stride;
- (void)filter_y;
- (void)filter_y_stride;
+ (void)filter;
+ (void)x0_q4;
+ (void)x_step_q4;
+ (void)y0_q4;
+ (void)y_step_q4;
(void)bd;
   for (r = h; r > 0; --r) {@@ -560,15 +521,16 @@
void vpx_highbd_convolve_avg_c(const uint16_t *src, ptrdiff_t src_stride,
uint16_t *dst, ptrdiff_t dst_stride,
- const int16_t *filter_x, int filter_x_stride,
- const int16_t *filter_y, int filter_y_stride,
-                               int w, int h, int bd) {+ const InterpKernel *filter, int x0_q4,
+ int x_step_q4, int y0_q4, int y_step_q4, int w,
+                               int h, int bd) {int x, y;
- (void)filter_x;
- (void)filter_x_stride;
- (void)filter_y;
- (void)filter_y_stride;
+ (void)filter;
+ (void)x0_q4;
+ (void)x_step_q4;
+ (void)y0_q4;
+ (void)y_step_q4;
(void)bd;
   for (y = 0; y < h; ++y) {--- a/vpx_dsp/vpx_convolve.h
+++ b/vpx_dsp/vpx_convolve.h
@@ -19,15 +19,15 @@
typedef void (*convolve_fn_t)(const uint8_t *src, ptrdiff_t src_stride,
uint8_t *dst, ptrdiff_t dst_stride,
- const int16_t *filter_x, int x_step_q4,
- const int16_t *filter_y, int y_step_q4, int w,
+ const InterpKernel *filter, int x0_q4,
+ int x_step_q4, int y0_q4, int y_step_q4, int w,
int h);
#if CONFIG_VP9_HIGHBITDEPTH
typedef void (*highbd_convolve_fn_t)(const uint16_t *src, ptrdiff_t src_stride,
uint16_t *dst, ptrdiff_t dst_stride,
- const int16_t *filter_x, int x_step_q4,
- const int16_t *filter_y, int y_step_q4,
+ const InterpKernel *filter, int x0_q4,
+ int x_step_q4, int y0_q4, int y_step_q4,
int w, int h, int bd);
#endif
--- a/vpx_dsp/vpx_dsp_rtcd_defs.pl
+++ b/vpx_dsp/vpx_dsp_rtcd_defs.pl
@@ -6,6 +6,7 @@
#include "vpx/vpx_integer.h"
#include "vpx_dsp/vpx_dsp_common.h"
+#include "vpx_dsp/vpx_filter.h"
EOF
}
@@ -331,69 +332,69 @@
#
# Sub Pixel Filters
#
-add_proto qw/void vpx_convolve_copy/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h";
+add_proto qw/void vpx_convolve_copy/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const InterpKernel *filter, int x0_q4, int x_step_q4, int y0_q4, int y_step_q4, int w, int h";
specialize qw/vpx_convolve_copy neon dspr2 msa sse2 vsx/;
-add_proto qw/void vpx_convolve_avg/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h";
+add_proto qw/void vpx_convolve_avg/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const InterpKernel *filter, int x0_q4, int x_step_q4, int y0_q4, int y_step_q4, int w, int h";
specialize qw/vpx_convolve_avg neon dspr2 msa sse2 vsx/;
-add_proto qw/void vpx_convolve8/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h";
+add_proto qw/void vpx_convolve8/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const InterpKernel *filter, int x0_q4, int x_step_q4, int y0_q4, int y_step_q4, int w, int h";
specialize qw/vpx_convolve8 sse2 ssse3 avx2 neon dspr2 msa vsx/;
-add_proto qw/void vpx_convolve8_horiz/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h";
+add_proto qw/void vpx_convolve8_horiz/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const InterpKernel *filter, int x0_q4, int x_step_q4, int y0_q4, int y_step_q4, int w, int h";
specialize qw/vpx_convolve8_horiz sse2 ssse3 avx2 neon dspr2 msa vsx/;
-add_proto qw/void vpx_convolve8_vert/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h";
+add_proto qw/void vpx_convolve8_vert/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const InterpKernel *filter, int x0_q4, int x_step_q4, int y0_q4, int y_step_q4, int w, int h";
specialize qw/vpx_convolve8_vert sse2 ssse3 avx2 neon dspr2 msa vsx/;
-add_proto qw/void vpx_convolve8_avg/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h";
+add_proto qw/void vpx_convolve8_avg/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const InterpKernel *filter, int x0_q4, int x_step_q4, int y0_q4, int y_step_q4, int w, int h";
specialize qw/vpx_convolve8_avg sse2 ssse3 neon dspr2 msa vsx/;
-add_proto qw/void vpx_convolve8_avg_horiz/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h";
+add_proto qw/void vpx_convolve8_avg_horiz/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const InterpKernel *filter, int x0_q4, int x_step_q4, int y0_q4, int y_step_q4, int w, int h";
specialize qw/vpx_convolve8_avg_horiz sse2 ssse3 neon dspr2 msa vsx/;
-add_proto qw/void vpx_convolve8_avg_vert/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h";
+add_proto qw/void vpx_convolve8_avg_vert/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const InterpKernel *filter, int x0_q4, int x_step_q4, int y0_q4, int y_step_q4, int w, int h";
specialize qw/vpx_convolve8_avg_vert sse2 ssse3 neon dspr2 msa vsx/;
-add_proto qw/void vpx_scaled_2d/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h";
+add_proto qw/void vpx_scaled_2d/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const InterpKernel *filter, int x0_q4, int x_step_q4, int y0_q4, int y_step_q4, int w, int h";
specialize qw/vpx_scaled_2d ssse3/;
-add_proto qw/void vpx_scaled_horiz/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h";
+add_proto qw/void vpx_scaled_horiz/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const InterpKernel *filter, int x0_q4, int x_step_q4, int y0_q4, int y_step_q4, int w, int h";
-add_proto qw/void vpx_scaled_vert/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h";
+add_proto qw/void vpx_scaled_vert/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const InterpKernel *filter, int x0_q4, int x_step_q4, int y0_q4, int y_step_q4, int w, int h";
-add_proto qw/void vpx_scaled_avg_2d/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h";
+add_proto qw/void vpx_scaled_avg_2d/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const InterpKernel *filter, int x0_q4, int x_step_q4, int y0_q4, int y_step_q4, int w, int h";
-add_proto qw/void vpx_scaled_avg_horiz/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h";
+add_proto qw/void vpx_scaled_avg_horiz/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const InterpKernel *filter, int x0_q4, int x_step_q4, int y0_q4, int y_step_q4, int w, int h";
-add_proto qw/void vpx_scaled_avg_vert/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h";
+add_proto qw/void vpx_scaled_avg_vert/, "const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const InterpKernel *filter, int x0_q4, int x_step_q4, int y0_q4, int y_step_q4, int w, int h";
 if (vpx_config("CONFIG_VP9_HIGHBITDEPTH") eq "yes") {#
# Sub Pixel Filters
#
- add_proto qw/void vpx_highbd_convolve_copy/, "const uint16_t *src, ptrdiff_t src_stride, uint16_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps";
+ add_proto qw/void vpx_highbd_convolve_copy/, "const uint16_t *src, ptrdiff_t src_stride, uint16_t *dst, ptrdiff_t dst_stride, const InterpKernel *filter, int x0_q4, int x_step_q4, int y0_q4, int y_step_q4, int w, int h, int bps";
specialize qw/vpx_highbd_convolve_copy sse2 avx2 neon/;
- add_proto qw/void vpx_highbd_convolve_avg/, "const uint16_t *src, ptrdiff_t src_stride, uint16_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps";
+ add_proto qw/void vpx_highbd_convolve_avg/, "const uint16_t *src, ptrdiff_t src_stride, uint16_t *dst, ptrdiff_t dst_stride, const InterpKernel *filter, int x0_q4, int x_step_q4, int y0_q4, int y_step_q4, int w, int h, int bps";
specialize qw/vpx_highbd_convolve_avg sse2 avx2 neon/;
- add_proto qw/void vpx_highbd_convolve8/, "const uint16_t *src, ptrdiff_t src_stride, uint16_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps";
+ add_proto qw/void vpx_highbd_convolve8/, "const uint16_t *src, ptrdiff_t src_stride, uint16_t *dst, ptrdiff_t dst_stride, const InterpKernel *filter, int x0_q4, int x_step_q4, int y0_q4, int y_step_q4, int w, int h, int bps";
specialize qw/vpx_highbd_convolve8 avx2 neon/, "$sse2_x86_64";
- add_proto qw/void vpx_highbd_convolve8_horiz/, "const uint16_t *src, ptrdiff_t src_stride, uint16_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps";
+ add_proto qw/void vpx_highbd_convolve8_horiz/, "const uint16_t *src, ptrdiff_t src_stride, uint16_t *dst, ptrdiff_t dst_stride, const InterpKernel *filter, int x0_q4, int x_step_q4, int y0_q4, int y_step_q4, int w, int h, int bps";
specialize qw/vpx_highbd_convolve8_horiz avx2 neon/, "$sse2_x86_64";
- add_proto qw/void vpx_highbd_convolve8_vert/, "const uint16_t *src, ptrdiff_t src_stride, uint16_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps";
+ add_proto qw/void vpx_highbd_convolve8_vert/, "const uint16_t *src, ptrdiff_t src_stride, uint16_t *dst, ptrdiff_t dst_stride, const InterpKernel *filter, int x0_q4, int x_step_q4, int y0_q4, int y_step_q4, int w, int h, int bps";
specialize qw/vpx_highbd_convolve8_vert avx2 neon/, "$sse2_x86_64";
- add_proto qw/void vpx_highbd_convolve8_avg/, "const uint16_t *src, ptrdiff_t src_stride, uint16_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps";
+ add_proto qw/void vpx_highbd_convolve8_avg/, "const uint16_t *src, ptrdiff_t src_stride, uint16_t *dst, ptrdiff_t dst_stride, const InterpKernel *filter, int x0_q4, int x_step_q4, int y0_q4, int y_step_q4, int w, int h, int bps";
specialize qw/vpx_highbd_convolve8_avg avx2 neon/, "$sse2_x86_64";
- add_proto qw/void vpx_highbd_convolve8_avg_horiz/, "const uint16_t *src, ptrdiff_t src_stride, uint16_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps";
+ add_proto qw/void vpx_highbd_convolve8_avg_horiz/, "const uint16_t *src, ptrdiff_t src_stride, uint16_t *dst, ptrdiff_t dst_stride, const InterpKernel *filter, int x0_q4, int x_step_q4, int y0_q4, int y_step_q4, int w, int h, int bps";
specialize qw/vpx_highbd_convolve8_avg_horiz avx2 neon/, "$sse2_x86_64";
- add_proto qw/void vpx_highbd_convolve8_avg_vert/, "const uint16_t *src, ptrdiff_t src_stride, uint16_t *dst, ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, const int16_t *filter_y, int y_step_q4, int w, int h, int bps";
+ add_proto qw/void vpx_highbd_convolve8_avg_vert/, "const uint16_t *src, ptrdiff_t src_stride, uint16_t *dst, ptrdiff_t dst_stride, const InterpKernel *filter, int x0_q4, int x_step_q4, int y0_q4, int y_step_q4, int w, int h, int bps";
specialize qw/vpx_highbd_convolve8_avg_vert avx2 neon/, "$sse2_x86_64";
} # CONFIG_VP9_HIGHBITDEPTH
--- a/vpx_dsp/vpx_filter.h
+++ b/vpx_dsp/vpx_filter.h
@@ -26,17 +26,6 @@
typedef int16_t InterpKernel[SUBPEL_TAPS];
-static INLINE const InterpKernel *get_filter_base(const int16_t *filter) {- // NOTE: This assumes that the filter table is 256-byte aligned.
- // TODO(agrange) Modify to make independent of table alignment.
- return (const InterpKernel *)(((intptr_t)filter) & ~((intptr_t)0xFF));
-}
-
-static INLINE int get_filter_offset(const int16_t *f,
-                                    const InterpKernel *base) {- return (int)((const InterpKernel *)(intptr_t)f - base);
-}
-
#ifdef __cplusplus
} // extern "C"
#endif
--- a/vpx_dsp/x86/convolve.h
+++ b/vpx_dsp/x86/convolve.h
@@ -20,14 +20,15 @@
uint8_t *output_ptr, ptrdiff_t out_pitch,
uint32_t output_height, const int16_t *filter);
-#define FUN_CONV_1D(name, step_q4, filter, dir, src_start, avg, opt) \
+#define FUN_CONV_1D(name, offset, step_q4, dir, src_start, avg, opt) \
void vpx_convolve8_##name##_##opt( \
const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, \
- ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, \
-      const int16_t *filter_y, int y_step_q4, int w, int h) {                \- (void)filter_x; \
+ ptrdiff_t dst_stride, const InterpKernel *filter_kernel, int x0_q4, \
+      int x_step_q4, int y0_q4, int y_step_q4, int w, int h) {               \+ const int16_t *filter = filter_kernel[offset]; \
+ (void)x0_q4; \
(void)x_step_q4; \
- (void)filter_y; \
+ (void)y0_q4; \
(void)y_step_q4; \
assert(filter[3] != 128); \
assert(step_q4 == 16); \
@@ -64,32 +65,36 @@
} \
}
-#define FUN_CONV_2D(avg, opt) \
- void vpx_convolve8_##avg##opt( \
- const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, \
- ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, \
-      const int16_t *filter_y, int y_step_q4, int w, int h) {                 \- assert(filter_x[3] != 128); \
- assert(filter_y[3] != 128); \
- assert(w <= 64); \
- assert(h <= 64); \
- assert(x_step_q4 == 16); \
- assert(y_step_q4 == 16); \
-    if (filter_x[0] | filter_x[1] | filter_x[2]) {                            \- DECLARE_ALIGNED(16, uint8_t, fdata2[64 * 71]); \
- vpx_convolve8_horiz_##opt(src - 3 * src_stride, src_stride, fdata2, 64, \
- filter_x, x_step_q4, filter_y, y_step_q4, w, \
- h + 7); \
- vpx_convolve8_##avg##vert_##opt(fdata2 + 3 * 64, 64, dst, dst_stride, \
- filter_x, x_step_q4, filter_y, \
- y_step_q4, w, h); \
-    } else {                                                                  \- DECLARE_ALIGNED(16, uint8_t, fdata2[64 * 65]); \
- vpx_convolve8_horiz_##opt(src, src_stride, fdata2, 64, filter_x, \
- x_step_q4, filter_y, y_step_q4, w, h + 1); \
- vpx_convolve8_##avg##vert_##opt(fdata2, 64, dst, dst_stride, filter_x, \
- x_step_q4, filter_y, y_step_q4, w, h); \
- } \
+#define FUN_CONV_2D(avg, opt) \
+ void vpx_convolve8_##avg##opt( \
+ const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, \
+ ptrdiff_t dst_stride, const InterpKernel *filter, int x0_q4, \
+      int x_step_q4, int y0_q4, int y_step_q4, int w, int h) {                 \+ const int16_t *filter_x = filter[x0_q4]; \
+ const int16_t *filter_y = filter[y0_q4]; \
+ (void)filter_y; \
+ assert(filter_x[3] != 128); \
+ assert(filter_y[3] != 128); \
+ assert(w <= 64); \
+ assert(h <= 64); \
+ assert(x_step_q4 == 16); \
+ assert(y_step_q4 == 16); \
+    if (filter_x[0] | filter_x[1] | filter_x[2]) {                             \+ DECLARE_ALIGNED(16, uint8_t, fdata2[64 * 71]); \
+ vpx_convolve8_horiz_##opt(src - 3 * src_stride, src_stride, fdata2, 64, \
+ filter, x0_q4, x_step_q4, y0_q4, y_step_q4, w, \
+ h + 7); \
+ vpx_convolve8_##avg##vert_##opt(fdata2 + 3 * 64, 64, dst, dst_stride, \
+ filter, x0_q4, x_step_q4, y0_q4, \
+ y_step_q4, w, h); \
+    } else {                                                                   \+ DECLARE_ALIGNED(16, uint8_t, fdata2[64 * 65]); \
+ vpx_convolve8_horiz_##opt(src, src_stride, fdata2, 64, filter, x0_q4, \
+ x_step_q4, y0_q4, y_step_q4, w, h + 1); \
+ vpx_convolve8_##avg##vert_##opt(fdata2, 64, dst, dst_stride, filter, \
+ x0_q4, x_step_q4, y0_q4, y_step_q4, w, \
+ h); \
+ } \
}
#if CONFIG_VP9_HIGHBITDEPTH
@@ -101,95 +106,97 @@
unsigned int output_height,
const int16_t *filter, int bd);
-#define HIGH_FUN_CONV_1D(name, step_q4, filter, dir, src_start, avg, opt) \
- void vpx_highbd_convolve8_##name##_##opt( \
- const uint16_t *src, ptrdiff_t src_stride, uint16_t *dst, \
- ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, \
-      const int16_t *filter_y, int y_step_q4, int w, int h, int bd) {     \-    if (step_q4 == 16 && filter[3] != 128) {                              \-      if (filter[0] | filter[1] | filter[2]) {                            \-        while (w >= 16) {                                                 \- vpx_highbd_filter_block1d16_##dir##8_##avg##opt( \
- src_start, src_stride, dst, dst_stride, h, filter, bd); \
- src += 16; \
- dst += 16; \
- w -= 16; \
- } \
-        while (w >= 8) {                                                  \- vpx_highbd_filter_block1d8_##dir##8_##avg##opt( \
- src_start, src_stride, dst, dst_stride, h, filter, bd); \
- src += 8; \
- dst += 8; \
- w -= 8; \
- } \
-        while (w >= 4) {                                                  \- vpx_highbd_filter_block1d4_##dir##8_##avg##opt( \
- src_start, src_stride, dst, dst_stride, h, filter, bd); \
- src += 4; \
- dst += 4; \
- w -= 4; \
- } \
-      } else {                                                            \-        while (w >= 16) {                                                 \- vpx_highbd_filter_block1d16_##dir##2_##avg##opt( \
- src, src_stride, dst, dst_stride, h, filter, bd); \
- src += 16; \
- dst += 16; \
- w -= 16; \
- } \
-        while (w >= 8) {                                                  \- vpx_highbd_filter_block1d8_##dir##2_##avg##opt( \
- src, src_stride, dst, dst_stride, h, filter, bd); \
- src += 8; \
- dst += 8; \
- w -= 8; \
- } \
-        while (w >= 4) {                                                  \- vpx_highbd_filter_block1d4_##dir##2_##avg##opt( \
- src, src_stride, dst, dst_stride, h, filter, bd); \
- src += 4; \
- dst += 4; \
- w -= 4; \
- } \
- } \
- } \
-    if (w) {                                                              \- vpx_highbd_convolve8_##name##_c(src, src_stride, dst, dst_stride, \
- filter_x, x_step_q4, filter_y, \
- y_step_q4, w, h, bd); \
- } \
- }
-
-#define HIGH_FUN_CONV_2D(avg, opt) \
- void vpx_highbd_convolve8_##avg##opt( \
+#define HIGH_FUN_CONV_1D(name, offset, step_q4, dir, src_start, avg, opt) \
+ void vpx_highbd_convolve8_##name##_##opt( \
const uint16_t *src, ptrdiff_t src_stride, uint16_t *dst, \
- ptrdiff_t dst_stride, const int16_t *filter_x, int x_step_q4, \
-      const int16_t *filter_y, int y_step_q4, int w, int h, int bd) {         \- assert(w <= 64); \
- assert(h <= 64); \
-    if (x_step_q4 == 16 && y_step_q4 == 16) {                                 \-      if ((filter_x[0] | filter_x[1] | filter_x[2]) || filter_x[3] == 128) {  \- DECLARE_ALIGNED(16, uint16_t, fdata2[64 * 71]); \
- vpx_highbd_convolve8_horiz_##opt(src - 3 * src_stride, src_stride, \
- fdata2, 64, filter_x, x_step_q4, \
- filter_y, y_step_q4, w, h + 7, bd); \
- vpx_highbd_convolve8_##avg##vert_##opt( \
- fdata2 + 192, 64, dst, dst_stride, filter_x, x_step_q4, filter_y, \
- y_step_q4, w, h, bd); \
+ ptrdiff_t dst_stride, const InterpKernel *filter_kernel, int x0_q4, \
+      int x_step_q4, int y0_q4, int y_step_q4, int w, int h, int bd) {        \+ const int16_t *filter = filter_kernel[offset]; \
+    if (step_q4 == 16 && filter[3] != 128) {                                  \+      if (filter[0] | filter[1] | filter[2]) {                                \+        while (w >= 16) {                                                     \+ vpx_highbd_filter_block1d16_##dir##8_##avg##opt( \
+ src_start, src_stride, dst, dst_stride, h, filter, bd); \
+ src += 16; \
+ dst += 16; \
+ w -= 16; \
+ } \
+        while (w >= 8) {                                                      \+ vpx_highbd_filter_block1d8_##dir##8_##avg##opt( \
+ src_start, src_stride, dst, dst_stride, h, filter, bd); \
+ src += 8; \
+ dst += 8; \
+ w -= 8; \
+ } \
+        while (w >= 4) {                                                      \+ vpx_highbd_filter_block1d4_##dir##8_##avg##opt( \
+ src_start, src_stride, dst, dst_stride, h, filter, bd); \
+ src += 4; \
+ dst += 4; \
+ w -= 4; \
+ } \
       } else {                                                                \- DECLARE_ALIGNED(16, uint16_t, fdata2[64 * 65]); \
- vpx_highbd_convolve8_horiz_##opt(src, src_stride, fdata2, 64, \
- filter_x, x_step_q4, filter_y, \
- y_step_q4, w, h + 1, bd); \
- vpx_highbd_convolve8_##avg##vert_##opt(fdata2, 64, dst, dst_stride, \
- filter_x, x_step_q4, filter_y, \
- y_step_q4, w, h, bd); \
+        while (w >= 16) {                                                     \+ vpx_highbd_filter_block1d16_##dir##2_##avg##opt( \
+ src, src_stride, dst, dst_stride, h, filter, bd); \
+ src += 16; \
+ dst += 16; \
+ w -= 16; \
+ } \
+        while (w >= 8) {                                                      \+ vpx_highbd_filter_block1d8_##dir##2_##avg##opt( \
+ src, src_stride, dst, dst_stride, h, filter, bd); \
+ src += 8; \
+ dst += 8; \
+ w -= 8; \
+ } \
+        while (w >= 4) {                                                      \+ vpx_highbd_filter_block1d4_##dir##2_##avg##opt( \
+ src, src_stride, dst, dst_stride, h, filter, bd); \
+ src += 4; \
+ dst += 4; \
+ w -= 4; \
+ } \
} \
-    } else {                                                                  \- vpx_highbd_convolve8_##avg##c(src, src_stride, dst, dst_stride, \
- filter_x, x_step_q4, filter_y, y_step_q4, \
- w, h, bd); \
} \
+    if (w) {                                                                  \+ vpx_highbd_convolve8_##name##_c(src, src_stride, dst, dst_stride, \
+ filter_kernel, x0_q4, x_step_q4, y0_q4, \
+ y_step_q4, w, h, bd); \
+ } \
+ }
+
+#define HIGH_FUN_CONV_2D(avg, opt) \
+ void vpx_highbd_convolve8_##avg##opt( \
+ const uint16_t *src, ptrdiff_t src_stride, uint16_t *dst, \
+ ptrdiff_t dst_stride, const InterpKernel *filter, int x0_q4, \
+      int x_step_q4, int y0_q4, int y_step_q4, int w, int h, int bd) {         \+ const int16_t *filter_x = filter[x0_q4]; \
+ assert(w <= 64); \
+ assert(h <= 64); \
+    if (x_step_q4 == 16 && y_step_q4 == 16) {                                  \+      if ((filter_x[0] | filter_x[1] | filter_x[2]) || filter_x[3] == 128) {   \+ DECLARE_ALIGNED(16, uint16_t, fdata2[64 * 71]); \
+ vpx_highbd_convolve8_horiz_##opt(src - 3 * src_stride, src_stride, \
+ fdata2, 64, filter, x0_q4, x_step_q4, \
+ y0_q4, y_step_q4, w, h + 7, bd); \
+ vpx_highbd_convolve8_##avg##vert_##opt( \
+ fdata2 + 192, 64, dst, dst_stride, filter, x0_q4, x_step_q4, \
+ y0_q4, y_step_q4, w, h, bd); \
+      } else {                                                                 \+ DECLARE_ALIGNED(16, uint16_t, fdata2[64 * 65]); \
+ vpx_highbd_convolve8_horiz_##opt(src, src_stride, fdata2, 64, filter, \
+ x0_q4, x_step_q4, y0_q4, y_step_q4, \
+ w, h + 1, bd); \
+ vpx_highbd_convolve8_##avg##vert_##opt(fdata2, 64, dst, dst_stride, \
+ filter, x0_q4, x_step_q4, \
+ y0_q4, y_step_q4, w, h, bd); \
+ } \
+    } else {                                                                   \+ vpx_highbd_convolve8_##avg##c(src, src_stride, dst, dst_stride, filter, \
+ x0_q4, x_step_q4, y0_q4, y_step_q4, w, h, \
+ bd); \
+ } \
}
#endif // CONFIG_VP9_HIGHBITDEPTH
--- a/vpx_dsp/x86/highbd_convolve_avx2.c
+++ b/vpx_dsp/x86/highbd_convolve_avx2.c
@@ -18,13 +18,14 @@
void vpx_highbd_convolve_copy_avx2(const uint16_t *src, ptrdiff_t src_stride,
uint16_t *dst, ptrdiff_t dst_stride,
- const int16_t *filter_x, int filter_x_stride,
- const int16_t *filter_y, int filter_y_stride,
+ const InterpKernel *filter, int x0_q4,
+ int x_step_q4, int y0_q4, int y_step_q4,
                                    int width, int h, int bd) {- (void)filter_x;
- (void)filter_y;
- (void)filter_x_stride;
- (void)filter_y_stride;
+ (void)filter;
+ (void)x0_q4;
+ (void)x_step_q4;
+ (void)y0_q4;
+ (void)y_step_q4;
(void)bd;
assert(width % 4 == 0);
@@ -99,13 +100,14 @@
void vpx_highbd_convolve_avg_avx2(const uint16_t *src, ptrdiff_t src_stride,
uint16_t *dst, ptrdiff_t dst_stride,
- const int16_t *filter_x, int filter_x_stride,
- const int16_t *filter_y, int filter_y_stride,
+ const InterpKernel *filter, int x0_q4,
+ int x_step_q4, int y0_q4, int y_step_q4,
                                   int width, int h, int bd) {- (void)filter_x;
- (void)filter_y;
- (void)filter_x_stride;
- (void)filter_y_stride;
+ (void)filter;
+ (void)x0_q4;
+ (void)x_step_q4;
+ (void)y0_q4;
+ (void)y_step_q4;
(void)bd;
assert(width % 4 == 0);
@@ -1073,8 +1075,8 @@
#define vpx_highbd_filter_block1d4_v8_avx2 vpx_highbd_filter_block1d4_v8_sse2
#define vpx_highbd_filter_block1d4_v2_avx2 vpx_highbd_filter_block1d4_v2_sse2
-HIGH_FUN_CONV_1D(horiz, x_step_q4, filter_x, h, src, , avx2);
-HIGH_FUN_CONV_1D(vert, y_step_q4, filter_y, v, src - src_stride * 3, , avx2);
+HIGH_FUN_CONV_1D(horiz, x0_q4, x_step_q4, h, src, , avx2);
+HIGH_FUN_CONV_1D(vert, y0_q4, y_step_q4, v, src - src_stride * 3, , avx2);
HIGH_FUN_CONV_2D(, avx2);
void vpx_highbd_filter_block1d4_h8_avg_sse2(const uint16_t *, ptrdiff_t,
@@ -1098,8 +1100,8 @@
#define vpx_highbd_filter_block1d4_v2_avg_avx2 \
vpx_highbd_filter_block1d4_v2_avg_sse2
-HIGH_FUN_CONV_1D(avg_horiz, x_step_q4, filter_x, h, src, avg_, avx2);
-HIGH_FUN_CONV_1D(avg_vert, y_step_q4, filter_y, v, src - src_stride * 3, avg_,
+HIGH_FUN_CONV_1D(avg_horiz, x0_q4, x_step_q4, h, src, avg_, avx2);
+HIGH_FUN_CONV_1D(avg_vert, y0_q4, y_step_q4, v, src - src_stride * 3, avg_,
avx2);
HIGH_FUN_CONV_2D(avg_, avx2);
--- a/vpx_dsp/x86/vpx_asm_stubs.c
+++ b/vpx_dsp/x86/vpx_asm_stubs.c
@@ -41,38 +41,38 @@
// void vpx_convolve8_horiz_sse2(const uint8_t *src, ptrdiff_t src_stride,
// uint8_t *dst, ptrdiff_t dst_stride,
-// const int16_t *filter_x, int x_step_q4,
-// const int16_t *filter_y, int y_step_q4,
+// const InterpKernel *filter, int x0_q4,
+// int32_t x_step_q4, int y0_q4, int y_step_q4,
// int w, int h);
// void vpx_convolve8_vert_sse2(const uint8_t *src, ptrdiff_t src_stride,
// uint8_t *dst, ptrdiff_t dst_stride,
-// const int16_t *filter_x, int x_step_q4,
-// const int16_t *filter_y, int y_step_q4,
+// const InterpKernel *filter, int x0_q4,
+// int32_t x_step_q4, int y0_q4, int y_step_q4,
// int w, int h);
// void vpx_convolve8_avg_horiz_sse2(const uint8_t *src, ptrdiff_t src_stride,
// uint8_t *dst, ptrdiff_t dst_stride,
-// const int16_t *filter_x, int x_step_q4,
-// const int16_t *filter_y, int y_step_q4,
-// int w, int h);
+// const InterpKernel *filter, int x0_q4,
+// int32_t x_step_q4, int y0_q4,
+// int y_step_q4, int w, int h);
// void vpx_convolve8_avg_vert_sse2(const uint8_t *src, ptrdiff_t src_stride,
// uint8_t *dst, ptrdiff_t dst_stride,
-// const int16_t *filter_x, int x_step_q4,
-// const int16_t *filter_y, int y_step_q4,
+// const InterpKernel *filter, int x0_q4,
+// int32_t x_step_q4, int y0_q4, int y_step_q4,
// int w, int h);
-FUN_CONV_1D(horiz, x_step_q4, filter_x, h, src, , sse2);
-FUN_CONV_1D(vert, y_step_q4, filter_y, v, src - src_stride * 3, , sse2);
-FUN_CONV_1D(avg_horiz, x_step_q4, filter_x, h, src, avg_, sse2);
-FUN_CONV_1D(avg_vert, y_step_q4, filter_y, v, src - src_stride * 3, avg_, sse2);
+FUN_CONV_1D(horiz, x0_q4, x_step_q4, h, src, , sse2);
+FUN_CONV_1D(vert, y0_q4, y_step_q4, v, src - src_stride * 3, , sse2);
+FUN_CONV_1D(avg_horiz, x0_q4, x_step_q4, h, src, avg_, sse2);
+FUN_CONV_1D(avg_vert, y0_q4, y_step_q4, v, src - src_stride * 3, avg_, sse2);
// void vpx_convolve8_sse2(const uint8_t *src, ptrdiff_t src_stride,
// uint8_t *dst, ptrdiff_t dst_stride,
-// const int16_t *filter_x, int x_step_q4,
-// const int16_t *filter_y, int y_step_q4,
+// const InterpKernel *filter, int x0_q4,
+// int32_t x_step_q4, int y0_q4, int y_step_q4,
// int w, int h);
// void vpx_convolve8_avg_sse2(const uint8_t *src, ptrdiff_t src_stride,
// uint8_t *dst, ptrdiff_t dst_stride,
-// const int16_t *filter_x, int x_step_q4,
-// const int16_t *filter_y, int y_step_q4,
+// const InterpKernel *filter, int x0_q4,
+// int32_t x_step_q4, int y0_q4, int y_step_q4,
// int w, int h);
FUN_CONV_2D(, sse2);
FUN_CONV_2D(avg_, sse2);
@@ -140,22 +140,22 @@
// const int16_t *filter_y,
// int y_step_q4,
// int w, int h, int bd);
-HIGH_FUN_CONV_1D(horiz, x_step_q4, filter_x, h, src, , sse2);
-HIGH_FUN_CONV_1D(vert, y_step_q4, filter_y, v, src - src_stride * 3, , sse2);
-HIGH_FUN_CONV_1D(avg_horiz, x_step_q4, filter_x, h, src, avg_, sse2);
-HIGH_FUN_CONV_1D(avg_vert, y_step_q4, filter_y, v, src - src_stride * 3, avg_,
+HIGH_FUN_CONV_1D(horiz, x0_q4, x_step_q4, h, src, , sse2);
+HIGH_FUN_CONV_1D(vert, y0_q4, y_step_q4, v, src - src_stride * 3, , sse2);
+HIGH_FUN_CONV_1D(avg_horiz, x0_q4, x_step_q4, h, src, avg_, sse2);
+HIGH_FUN_CONV_1D(avg_vert, y0_q4, y_step_q4, v, src - src_stride * 3, avg_,
sse2);
// void vpx_highbd_convolve8_sse2(const uint8_t *src, ptrdiff_t src_stride,
// uint8_t *dst, ptrdiff_t dst_stride,
-// const int16_t *filter_x, int x_step_q4,
-// const int16_t *filter_y, int y_step_q4,
+// const InterpKernel *filter, int x0_q4,
+// int32_t x_step_q4, int y0_q4, int y_step_q4,
// int w, int h, int bd);
// void vpx_highbd_convolve8_avg_sse2(const uint8_t *src, ptrdiff_t src_stride,
// uint8_t *dst, ptrdiff_t dst_stride,
-// const int16_t *filter_x, int x_step_q4,
-// const int16_t *filter_y, int y_step_q4,
-// int w, int h, int bd);
+// const InterpKernel *filter, int x0_q4,
+// int32_t x_step_q4, int y0_q4,
+// int y_step_q4, int w, int h, int bd);
HIGH_FUN_CONV_2D(, sse2);
HIGH_FUN_CONV_2D(avg_, sse2);
#endif // CONFIG_VP9_HIGHBITDEPTH && ARCH_X86_64
--- a/vpx_dsp/x86/vpx_convolve_copy_sse2.asm
+++ b/vpx_dsp/x86/vpx_convolve_copy_sse2.asm
@@ -20,14 +20,14 @@
%endif
%ifidn %2, highbd
%define pavg pavgw
-cglobal %2_convolve_%1, 4, 7, 4+AUX_XMM_REGS, src, src_stride, \
+cglobal %2_convolve_%1, 4, 8, 4+AUX_XMM_REGS, src, src_stride, \
dst, dst_stride, \
- fx, fxs, fy, fys, w, h, bd
+ f, fxo, fxs, fyo, fys, w, h, bd
%else
%define pavg pavgb
-cglobal convolve_%1, 4, 7, 4+AUX_XMM_REGS, src, src_stride, \
+cglobal convolve_%1, 4, 8, 4+AUX_XMM_REGS, src, src_stride, \
dst, dst_stride, \
- fx, fxs, fy, fys, w, h
+ f, fxo, fxs, fyo, fys, w, h
%endif
mov r4d, dword wm
%ifidn %2, highbd
--- a/vpx_dsp/x86/vpx_subpixel_8t_intrin_avx2.c
+++ b/vpx_dsp/x86/vpx_subpixel_8t_intrin_avx2.c
@@ -554,21 +554,21 @@
#define vpx_filter_block1d4_h2_avx2 vpx_filter_block1d4_h2_ssse3
// void vpx_convolve8_horiz_avx2(const uint8_t *src, ptrdiff_t src_stride,
// uint8_t *dst, ptrdiff_t dst_stride,
-// const int16_t *filter_x, int x_step_q4,
-// const int16_t *filter_y, int y_step_q4,
+// const InterpKernel *filter, int x0_q4,
+// int32_t x_step_q4, int y0_q4, int y_step_q4,
// int w, int h);
// void vpx_convolve8_vert_avx2(const uint8_t *src, ptrdiff_t src_stride,
// uint8_t *dst, ptrdiff_t dst_stride,
-// const int16_t *filter_x, int x_step_q4,
-// const int16_t *filter_y, int y_step_q4,
+// const InterpKernel *filter, int x0_q4,
+// int32_t x_step_q4, int y0_q4, int y_step_q4,
// int w, int h);
-FUN_CONV_1D(horiz, x_step_q4, filter_x, h, src, , avx2);
-FUN_CONV_1D(vert, y_step_q4, filter_y, v, src - src_stride * 3, , avx2);
+FUN_CONV_1D(horiz, x0_q4, x_step_q4, h, src, , avx2);
+FUN_CONV_1D(vert, y0_q4, y_step_q4, v, src - src_stride * 3, , avx2);
// void vpx_convolve8_avx2(const uint8_t *src, ptrdiff_t src_stride,
// uint8_t *dst, ptrdiff_t dst_stride,
-// const int16_t *filter_x, int x_step_q4,
-// const int16_t *filter_y, int y_step_q4,
+// const InterpKernel *filter, int x0_q4,
+// int32_t x_step_q4, int y0_q4, int y_step_q4,
// int w, int h);
FUN_CONV_2D(, avx2);
#endif // HAVE_AX2 && HAVE_SSSE3
--- a/vpx_dsp/x86/vpx_subpixel_8t_intrin_ssse3.c
+++ b/vpx_dsp/x86/vpx_subpixel_8t_intrin_ssse3.c
@@ -306,29 +306,28 @@
// void vpx_convolve8_horiz_ssse3(const uint8_t *src, ptrdiff_t src_stride,
// uint8_t *dst, ptrdiff_t dst_stride,
-// const int16_t *filter_x, int x_step_q4,
-// const int16_t *filter_y, int y_step_q4,
+// const InterpKernel *filter, int x0_q4,
+// int32_t x_step_q4, int y0_q4, int y_step_q4,
// int w, int h);
// void vpx_convolve8_vert_ssse3(const uint8_t *src, ptrdiff_t src_stride,
// uint8_t *dst, ptrdiff_t dst_stride,
-// const int16_t *filter_x, int x_step_q4,
-// const int16_t *filter_y, int y_step_q4,
+// const InterpKernel *filter, int x0_q4,
+// int32_t x_step_q4, int y0_q4, int y_step_q4,
// int w, int h);
// void vpx_convolve8_avg_horiz_ssse3(const uint8_t *src, ptrdiff_t src_stride,
// uint8_t *dst, ptrdiff_t dst_stride,
-// const int16_t *filter_x, int x_step_q4,
-// const int16_t *filter_y, int y_step_q4,
-// int w, int h);
+// const InterpKernel *filter, int x0_q4,
+// int32_t x_step_q4, int y0_q4,
+// int y_step_q4, int w, int h);
// void vpx_convolve8_avg_vert_ssse3(const uint8_t *src, ptrdiff_t src_stride,
// uint8_t *dst, ptrdiff_t dst_stride,
-// const int16_t *filter_x, int x_step_q4,
-// const int16_t *filter_y, int y_step_q4,
-// int w, int h);
-FUN_CONV_1D(horiz, x_step_q4, filter_x, h, src, , ssse3);
-FUN_CONV_1D(vert, y_step_q4, filter_y, v, src - src_stride * 3, , ssse3);
-FUN_CONV_1D(avg_horiz, x_step_q4, filter_x, h, src, avg_, ssse3);
-FUN_CONV_1D(avg_vert, y_step_q4, filter_y, v, src - src_stride * 3, avg_,
- ssse3);
+// const InterpKernel *filter, int x0_q4,
+// int32_t x_step_q4, int y0_q4,
+// int y_step_q4, int w, int h);
+FUN_CONV_1D(horiz, x0_q4, x_step_q4, h, src, , ssse3);
+FUN_CONV_1D(vert, y0_q4, y_step_q4, v, src - src_stride * 3, , ssse3);
+FUN_CONV_1D(avg_horiz, x0_q4, x_step_q4, h, src, avg_, ssse3);
+FUN_CONV_1D(avg_vert, y0_q4, y_step_q4, v, src - src_stride * 3, avg_, ssse3);
#define TRANSPOSE_8X8(in0, in1, in2, in3, in4, in5, in6, in7, out0, out1, \
out2, out3, out4, out5, out6, out7) \
@@ -813,9 +812,9 @@
static void scaledconvolve2d(const uint8_t *src, ptrdiff_t src_stride,
uint8_t *dst, ptrdiff_t dst_stride,
- const InterpKernel *const x_filters, int x0_q4,
- int x_step_q4, const InterpKernel *const y_filters,
-                             int y0_q4, int y_step_q4, int w, int h) {+ const InterpKernel *const filter, int x0_q4,
+ int x_step_q4, int y0_q4, int y_step_q4, int w,
+                             int h) {// Note: Fixed size intermediate buffer, temp, places limits on parameters.
// 2d filtering proceeds in 2 steps:
// (1) Interpolate horizontally into an intermediate buffer, temp.
@@ -840,49 +839,43 @@
   if (w >= 8) {scaledconvolve_horiz_w8(src - src_stride * (SUBPEL_TAPS / 2 - 1),
- src_stride, temp, 64, x_filters, x0_q4, x_step_q4,
- w, intermediate_height);
+ src_stride, temp, 64, filter, x0_q4, x_step_q4, w,
+ intermediate_height);
   } else {scaledconvolve_horiz_w4(src - src_stride * (SUBPEL_TAPS / 2 - 1),
- src_stride, temp, 64, x_filters, x0_q4, x_step_q4,
- w, intermediate_height);
+ src_stride, temp, 64, filter, x0_q4, x_step_q4, w,
+ intermediate_height);
}
   if (w >= 16) {scaledconvolve_vert_w16(temp + 64 * (SUBPEL_TAPS / 2 - 1), 64, dst,
- dst_stride, y_filters, y0_q4, y_step_q4, w, h);
+ dst_stride, filter, y0_q4, y_step_q4, w, h);
   } else if (w == 8) {scaledconvolve_vert_w8(temp + 64 * (SUBPEL_TAPS / 2 - 1), 64, dst,
- dst_stride, y_filters, y0_q4, y_step_q4, w, h);
+ dst_stride, filter, y0_q4, y_step_q4, w, h);
   } else {scaledconvolve_vert_w4(temp + 64 * (SUBPEL_TAPS / 2 - 1), 64, dst,
- dst_stride, y_filters, y0_q4, y_step_q4, w, h);
+ dst_stride, filter, y0_q4, y_step_q4, w, h);
}
}
void vpx_scaled_2d_ssse3(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst,
- ptrdiff_t dst_stride, const int16_t *filter_x,
- int x_step_q4, const int16_t *filter_y, int y_step_q4,
+ ptrdiff_t dst_stride, const InterpKernel *filter,
+ int x0_q4, int x_step_q4, int y0_q4, int y_step_q4,
                          int w, int h) {- const InterpKernel *const filters_x = get_filter_base(filter_x);
- const int x0_q4 = get_filter_offset(filter_x, filters_x);
-
- const InterpKernel *const filters_y = get_filter_base(filter_y);
- const int y0_q4 = get_filter_offset(filter_y, filters_y);
-
- scaledconvolve2d(src, src_stride, dst, dst_stride, filters_x, x0_q4,
- x_step_q4, filters_y, y0_q4, y_step_q4, w, h);
+ scaledconvolve2d(src, src_stride, dst, dst_stride, filter, x0_q4, x_step_q4,
+ y0_q4, y_step_q4, w, h);
}
// void vp9_convolve8_ssse3(const uint8_t *src, ptrdiff_t src_stride,
// uint8_t *dst, ptrdiff_t dst_stride,
-// const int16_t *filter_x, int x_step_q4,
-// const int16_t *filter_y, int y_step_q4,
+// const InterpKernel *filter, int x0_q4,
+// int32_t x_step_q4, int y0_q4, int y_step_q4,
// int w, int h);
// void vpx_convolve8_avg_ssse3(const uint8_t *src, ptrdiff_t src_stride,
// uint8_t *dst, ptrdiff_t dst_stride,
-// const int16_t *filter_x, int x_step_q4,
-// const int16_t *filter_y, int y_step_q4,
+// const InterpKernel *filter, int x0_q4,
+// int32_t x_step_q4, int y0_q4, int y_step_q4,
// int w, int h);
FUN_CONV_2D(, ssse3);
FUN_CONV_2D(avg_, ssse3);
--
⑨