ref: 1f11352c071da81c2f65ae7673415910f7110755
parent: 1d13a6c321b3af595adb59f175894ebb9c36c1d6
author: sdeng <sdeng@google.com>
date: Tue Jun 4 09:54:18 EDT 2019
Fix a bug in best RD cost updating This CL fixed a bug that sometimes we calculate the best rd cost using uninitialized rd_div. This CL also includes a small refactoring of rd_pick_partition(). Speed change: (the smaller the better) Performance counter stats for './vpxenc park_joy_480p.y4m --limit=50 -o output.webm': with this CL: 297,086,181,136 instructions:u without this CL: 299,285,835,104 instructions:u Quality change: (negative is better) avg_psnr ovr_psnr ssim (low_res) 0.007 0.005 -0.002 (mid_res) 0.022 0.028 0.007 (hd_res) -0.008 -0.003 -0.014 Change-Id: I8924d8426364304212bcef3aba13346783e6f1a8
--- a/vp9/encoder/vp9_encodeframe.c
+++ b/vp9/encoder/vp9_encodeframe.c
@@ -3807,11 +3807,11 @@
// TODO(jingning,jimbankoski,rbultje): properly skip partition types that are
// unlikely to be selected depending on previous rate-distortion optimization
// results, for encoding speed-up.
-static void rd_pick_partition(VP9_COMP *cpi, ThreadData *td,
- TileDataEnc *tile_data, TOKENEXTRA **tp,
- int mi_row, int mi_col, BLOCK_SIZE bsize,
- RD_COST *rd_cost, RD_COST best_rdc,
- PC_TREE *pc_tree) {
+static int rd_pick_partition(VP9_COMP *cpi, ThreadData *td,
+ TileDataEnc *tile_data, TOKENEXTRA **tp,
+ int mi_row, int mi_col, BLOCK_SIZE bsize,
+ RD_COST *rd_cost, RD_COST best_rdc,
+ PC_TREE *pc_tree) {
VP9_COMMON *const cm = &cpi->common;
const VP9EncoderConfig *const oxcf = &cpi->oxcf;
TileInfo *const tile_info = &tile_data->tile_info;
@@ -3859,12 +3859,7 @@
// RD search. It may be used to prune ref frame selection of rect partitions.
uint8_t ref_frames_used[4] = { 0, 0, 0, 0 };
- const int rd_div = x->rddiv;
int partition_mul = x->cb_rdmult;
- if (oxcf->tuning == VP8_TUNE_SSIM) {
- set_ssim_rdmult(cpi, x, bsize, mi_row, mi_col, &partition_mul);
- }
- vp9_rd_cost_update(partition_mul, rd_div, &best_rdc);
(void)*tp_orig;
@@ -3881,6 +3876,11 @@
set_offsets(cpi, tile_info, x, mi_row, mi_col, bsize);
+ if (oxcf->tuning == VP8_TUNE_SSIM) {
+ set_ssim_rdmult(cpi, x, bsize, mi_row, mi_col, &partition_mul);
+ }
+ vp9_rd_cost_update(partition_mul, x->rddiv, &best_rdc);
+
if (bsize == BLOCK_16X16 && cpi->oxcf.aq_mode != NO_AQ &&
cpi->oxcf.aq_mode != LOOKAHEAD_AQ)
x->mb_energy = vp9_block_energy(cpi, x, bsize);
@@ -4023,7 +4023,7 @@
}
if (bsize >= BLOCK_8X8) {
this_rdc.rate += cpi->partition_cost[pl][PARTITION_NONE];
- vp9_rd_cost_update(partition_mul, rd_div, &this_rdc);
+ vp9_rd_cost_update(partition_mul, x->rddiv, &this_rdc);
}
if (this_rdc.rdcost < best_rdc.rdcost) {
@@ -4165,24 +4165,24 @@
++i) {
const int x_idx = (i & 1) * mi_step;
const int y_idx = (i >> 1) * mi_step;
+ int found_best_rd = 0;
RD_COST best_rdc_split;
- if (best_rdc.rate == INT_MAX || best_rdc.dist == INT64_MAX) {
- best_rdc_split.rate = INT_MAX;
- best_rdc_split.dist = INT64_MAX;
- } else {
+ vp9_rd_cost_reset(&best_rdc_split);
+
+ if (best_rdc.rate < INT_MAX && best_rdc.dist < INT64_MAX) {
+ // A must split test here increases the number of sub
+ // partitions but hurts metrics results quite a bit,
+ // so this extra test is commented out pending
+ // further tests on whether it adds much in terms of
+ // visual quality.
+ // (must_split) ? best_rdc.rate
+ // : best_rdc.rate - sum_rdc.rate,
+ // (must_split) ? best_rdc.dist
+ // : best_rdc.dist - sum_rdc.dist,
best_rdc_split.rate = best_rdc.rate - sum_rdc.rate;
best_rdc_split.dist = best_rdc.dist - sum_rdc.dist;
}
- // A must split test here increases the number of sub
- // partitions but hurts metrics results quite a bit,
- // so this extra test is commented out pending
- // further tests on whether it adds much in terms of
- // visual quality.
- // (must_split) ? best_rdc.rdcost
- // : best_rdc.rdcost - sum_rdc.rdcost,
- best_rdc_split.rdcost = best_rdc.rdcost - sum_rdc.rdcost;
-
if (mi_row + y_idx >= cm->mi_rows || mi_col + x_idx >= cm->mi_cols)
continue;
@@ -4189,13 +4189,11 @@
pc_tree->split[i]->index = i;
if (cpi->sf.prune_ref_frame_for_rect_partitions)
pc_tree->split[i]->none.rate = INT_MAX;
- rd_pick_partition(cpi, td, tile_data, tp, mi_row + y_idx,
- mi_col + x_idx, subsize, &this_rdc, best_rdc_split,
- pc_tree->split[i]);
+ found_best_rd = rd_pick_partition(
+ cpi, td, tile_data, tp, mi_row + y_idx, mi_col + x_idx, subsize,
+ &this_rdc, best_rdc_split, pc_tree->split[i]);
- if (this_rdc.dist == INT64_MAX || this_rdc.rate == INT_MAX ||
- (this_rdc.dist == best_rdc_split.dist &&
- this_rdc.rate == best_rdc_split.rate)) {
+ if (found_best_rd == 0) {
sum_rdc.rdcost = INT64_MAX;
break;
} else {
@@ -4208,7 +4206,7 @@
}
sum_rdc.rate += this_rdc.rate;
sum_rdc.dist += this_rdc.dist;
- vp9_rd_cost_update(partition_mul, rd_div, &sum_rdc);
+ vp9_rd_cost_update(partition_mul, x->rddiv, &sum_rdc);
}
}
}
@@ -4215,7 +4213,7 @@
if (((sum_rdc.rdcost < best_rdc.rdcost) || must_split) && i == 4) {
sum_rdc.rate += cpi->partition_cost[pl][PARTITION_SPLIT];
- vp9_rd_cost_update(partition_mul, rd_div, &sum_rdc);
+ vp9_rd_cost_update(partition_mul, x->rddiv, &sum_rdc);
if ((sum_rdc.rdcost < best_rdc.rdcost) ||
(must_split && (sum_rdc.dist < best_rdc.dist))) {
@@ -4283,7 +4281,7 @@
best_rdc.dist);
if (sum_rdc.rdcost < INT64_MAX) {
sum_rdc.rate += part_mode_rate;
- vp9_rd_cost_update(partition_mul, rd_div, &sum_rdc);
+ vp9_rd_cost_update(partition_mul, x->rddiv, &sum_rdc);
}
if (sum_rdc.rdcost < best_rdc.rdcost && mi_row + mi_step < cm->mi_rows &&
@@ -4303,7 +4301,7 @@
} else {
sum_rdc.rate += this_rdc.rate;
sum_rdc.dist += this_rdc.dist;
- vp9_rd_cost_update(partition_mul, rd_div, &sum_rdc);
+ vp9_rd_cost_update(partition_mul, x->rddiv, &sum_rdc);
}
}
@@ -4333,7 +4331,7 @@
best_rdc.dist);
if (sum_rdc.rdcost < INT64_MAX) {
sum_rdc.rate += part_mode_rate;
- vp9_rd_cost_update(partition_mul, rd_div, &sum_rdc);
+ vp9_rd_cost_update(partition_mul, x->rddiv, &sum_rdc);
}
if (sum_rdc.rdcost < best_rdc.rdcost && mi_col + mi_step < cm->mi_cols &&
@@ -4353,7 +4351,7 @@
} else {
sum_rdc.rate += this_rdc.rate;
sum_rdc.dist += this_rdc.dist;
- vp9_rd_cost_update(partition_mul, rd_div, &sum_rdc);
+ vp9_rd_cost_update(partition_mul, x->rddiv, &sum_rdc);
}
}
@@ -4380,6 +4378,8 @@
} else {
assert(tp_orig == *tp);
}
+
+ return should_encode_sb;
}
static void encode_rd_sb_row(VP9_COMP *cpi, ThreadData *td,
--- a/vp9/encoder/vp9_rd.c
+++ b/vp9/encoder/vp9_rd.c
@@ -58,6 +58,8 @@
}
int64_t vp9_calculate_rd_cost(int mult, int div, int rate, int64_t dist) {
+ assert(mult >= 0);
+ assert(div > 0);
if (rate >= 0 && dist >= 0) {
return RDCOST(mult, div, rate, dist);
}
@@ -69,6 +71,7 @@
}
return -RDCOST(mult, div, -rate, -dist);
}
+
void vp9_rd_cost_update(int mult, int div, RD_COST *rd_cost) {
if (rd_cost->rate < INT_MAX && rd_cost->dist < INT64_MAX) {
rd_cost->rdcost =