ref: ef41c6286d1151dd6eeabea4e9160364f5aeee8e
parent: 71b38a144ebc50cb8bb043366ae959267acf4206
author: Linfeng Zhang <linfengz@google.com>
date: Wed Sep 6 08:01:07 EDT 2017
Update convolve functions' assertions So that 4 to 1 frame scaling can call them. Change-Id: I9ec438aa63b923ba164ad3c59d7ecfa12789eab5
--- a/test/vp9_scale_test.cc
+++ b/test/vp9_scale_test.cc
@@ -49,10 +49,10 @@
void RunTest() {
static const int kNumSizesToTest = 4;
- static const int kNumScaleFactorsToTest = 2;
+ static const int kNumScaleFactorsToTest = 4;
static const int kWidthsToTest[] = { 16, 32, 48, 64 };
static const int kHeightsToTest[] = { 16, 20, 24, 28 };
- static const int kScaleFactors[] = { 1, 2 };
+ static const int kScaleFactors[] = { 1, 2, 3, 4 };
for (INTERP_FILTER filter_type = 0; filter_type < 4; ++filter_type) {
for (int phase_scaler = 0; phase_scaler < 16; ++phase_scaler) {
for (int h = 0; h < kNumSizesToTest; ++h) {
@@ -132,8 +132,8 @@
TEST_P(ScaleTest, DISABLED_Speed) {
static const int kCountSpeedTestBlock = 100;
- static const int kNumScaleFactorsToTest = 2;
- static const int kScaleFactors[] = { 1, 2 };
+ static const int kNumScaleFactorsToTest = 4;
+ static const int kScaleFactors[] = { 1, 2, 3, 4 };
const int src_height = 1280;
const int src_width = 720;
for (INTERP_FILTER filter_type = 2; filter_type < 4; ++filter_type) {
--- a/vpx_dsp/vpx_convolve.c
+++ b/vpx_dsp/vpx_convolve.c
@@ -129,6 +129,9 @@
// --Must round-up because block may be located at sub-pixel position.
// --Require an additional SUBPEL_TAPS rows for the 8-tap filter tails.
// --((64 - 1) * 32 + 15) >> 4 + 8 = 135.
+ // When calling in frame scaling function, the smallest scaling factor is x1/4
+ // ==> y_step_q4 = 64. Since w and h are at most 16, the temp buffer is still
+ // big enough.
uint8_t temp[64 * 135];
const int intermediate_height =
(((h - 1) * y_step_q4 + y0_q4) >> SUBPEL_BITS) + SUBPEL_TAPS;
@@ -135,8 +138,8 @@
assert(w <= 64);
assert(h <= 64);
- assert(y_step_q4 <= 32);
- assert(x_step_q4 <= 32);
+ assert(y_step_q4 <= 32 || (y_step_q4 <= 64 && h <= 32));
+ assert(x_step_q4 <= 64);
convolve_horiz(src - src_stride * (SUBPEL_TAPS / 2 - 1), src_stride, temp, 64,
filter, x0_q4, x_step_q4, w, intermediate_height);
--- a/vpx_dsp/x86/vpx_subpixel_8t_intrin_ssse3.c
+++ b/vpx_dsp/x86/vpx_subpixel_8t_intrin_ssse3.c
@@ -828,6 +828,9 @@
// --Require an additional SUBPEL_TAPS rows for the 8-tap filter tails.
// --((64 - 1) * 32 + 15) >> 4 + 8 = 135.
// --Require an additional 8 rows for the horiz_w8 transpose tail.
+ // When calling in frame scaling function, the smallest scaling factor is x1/4
+ // ==> y_step_q4 = 64. Since w and h are at most 16, the temp buffer is still
+ // big enough.
DECLARE_ALIGNED(16, uint8_t, temp[(135 + 8) * 64]);
const int intermediate_height =
(((h - 1) * y_step_q4 + y0_q4) >> SUBPEL_BITS) + SUBPEL_TAPS;
@@ -834,8 +837,8 @@
assert(w <= 64);
assert(h <= 64);
- assert(y_step_q4 <= 32);
- assert(x_step_q4 <= 32);
+ assert(y_step_q4 <= 32 || (y_step_q4 <= 64 && h <= 32));
+ assert(x_step_q4 <= 64);
if (w >= 8) {
scaledconvolve_horiz_w8(src - src_stride * (SUBPEL_TAPS / 2 - 1),