ref: 4bb1cc06fdce4cf3a8298dd4120bb26451bc46e0
parent: 0087f5c74f7ae6cc8899c8c1535d5ec18e5a5f66
parent: fc165fbe00f546dd47755528eb790f7ec23ebb61
author: Angie Chiang <angiebird@google.com>
date: Wed Jan 9 11:11:04 EST 2019
Merge changes Icec98e6f,I63614e65,I25ea05f4 * changes: Add full_pixel_exhaustive_new Add sse cost in vp9_full_pixel_diamond_new Use motion field for mv inconsistency in mv search
--- a/vp9/encoder/vp9_mcomp.c
+++ b/vp9/encoder/vp9_mcomp.c
@@ -1732,6 +1732,9 @@
return best_sad;
}
+#define MIN_RANGE 7
+#define MAX_RANGE 256
+#define MIN_INTERVAL 1
#if CONFIG_NON_GREEDY_MV
double vp9_nb_mvs_inconsistency(const MV *mv, const int_mv *nb_mvs,
int mv_num) {
@@ -1757,6 +1760,152 @@
return best_cost;
}
+static double exhuastive_mesh_search_new(const MACROBLOCK *x, MV *best_mv,
+ int range, int step,
+ const vp9_variance_fn_ptr_t *fn_ptr,
+ const MV *center_mv, double lambda,
+ const int_mv *nb_full_mvs,
+ int full_mv_num) {
+ const MACROBLOCKD *const xd = &x->e_mbd;
+ const struct buf_2d *const what = &x->plane[0].src;
+ const struct buf_2d *const in_what = &xd->plane[0].pre[0];
+ MV fcenter_mv = { center_mv->row, center_mv->col };
+ double best_sad;
+ int r, c, i;
+ int start_col, end_col, start_row, end_row;
+ int col_step = (step > 1) ? step : 4;
+
+ assert(step >= 1);
+
+ clamp_mv(&fcenter_mv, x->mv_limits.col_min, x->mv_limits.col_max,
+ x->mv_limits.row_min, x->mv_limits.row_max);
+ *best_mv = fcenter_mv;
+ best_sad =
+ fn_ptr->sdf(what->buf, what->stride,
+ get_buf_from_mv(in_what, &fcenter_mv), in_what->stride) +
+ lambda * vp9_nb_mvs_inconsistency(&fcenter_mv, nb_full_mvs, full_mv_num);
+ start_row = VPXMAX(-range, x->mv_limits.row_min - fcenter_mv.row);
+ start_col = VPXMAX(-range, x->mv_limits.col_min - fcenter_mv.col);
+ end_row = VPXMIN(range, x->mv_limits.row_max - fcenter_mv.row);
+ end_col = VPXMIN(range, x->mv_limits.col_max - fcenter_mv.col);
+
+ for (r = start_row; r <= end_row; r += step) {
+ for (c = start_col; c <= end_col; c += col_step) {
+ // Step > 1 means we are not checking every location in this pass.
+ if (step > 1) {
+ const MV mv = { fcenter_mv.row + r, fcenter_mv.col + c };
+ double sad =
+ fn_ptr->sdf(what->buf, what->stride, get_buf_from_mv(in_what, &mv),
+ in_what->stride);
+ if (sad < best_sad) {
+ sad +=
+ lambda * vp9_nb_mvs_inconsistency(&mv, nb_full_mvs, full_mv_num);
+ if (sad < best_sad) {
+ best_sad = sad;
+ *best_mv = mv;
+ }
+ }
+ } else {
+ // 4 sads in a single call if we are checking every location
+ if (c + 3 <= end_col) {
+ unsigned int sads[4];
+ const uint8_t *addrs[4];
+ for (i = 0; i < 4; ++i) {
+ const MV mv = { fcenter_mv.row + r, fcenter_mv.col + c + i };
+ addrs[i] = get_buf_from_mv(in_what, &mv);
+ }
+ fn_ptr->sdx4df(what->buf, what->stride, addrs, in_what->stride, sads);
+
+ for (i = 0; i < 4; ++i) {
+ if (sads[i] < best_sad) {
+ const MV mv = { fcenter_mv.row + r, fcenter_mv.col + c + i };
+ const double sad =
+ sads[i] + lambda * vp9_nb_mvs_inconsistency(&mv, nb_full_mvs,
+ full_mv_num);
+ if (sad < best_sad) {
+ best_sad = sad;
+ *best_mv = mv;
+ }
+ }
+ }
+ } else {
+ for (i = 0; i < end_col - c; ++i) {
+ const MV mv = { fcenter_mv.row + r, fcenter_mv.col + c + i };
+ double sad =
+ fn_ptr->sdf(what->buf, what->stride,
+ get_buf_from_mv(in_what, &mv), in_what->stride);
+ if (sad < best_sad) {
+ sad += lambda *
+ vp9_nb_mvs_inconsistency(&mv, nb_full_mvs, full_mv_num);
+ if (sad < best_sad) {
+ best_sad = sad;
+ *best_mv = mv;
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+
+ return best_sad;
+}
+
+static double full_pixel_exhaustive_new(const VP9_COMP *cpi, MACROBLOCK *x,
+ MV *centre_mv_full,
+ const vp9_variance_fn_ptr_t *fn_ptr,
+ MV *dst_mv, double lambda,
+ const int_mv *nb_full_mvs,
+ int full_mv_num) {
+ const SPEED_FEATURES *const sf = &cpi->sf;
+ MV temp_mv = { centre_mv_full->row, centre_mv_full->col };
+ double bestsme;
+ int i;
+ int interval = sf->mesh_patterns[0].interval;
+ int range = sf->mesh_patterns[0].range;
+ int baseline_interval_divisor;
+ const MV dummy_mv = { 0, 0 };
+
+ // Trap illegal values for interval and range for this function.
+ if ((range < MIN_RANGE) || (range > MAX_RANGE) || (interval < MIN_INTERVAL) ||
+ (interval > range)) {
+ printf("ERROR: invalid range\n");
+ assert(0);
+ }
+
+ baseline_interval_divisor = range / interval;
+
+ // Check size of proposed first range against magnitude of the centre
+ // value used as a starting point.
+ range = VPXMAX(range, (5 * VPXMAX(abs(temp_mv.row), abs(temp_mv.col))) / 4);
+ range = VPXMIN(range, MAX_RANGE);
+ interval = VPXMAX(interval, range / baseline_interval_divisor);
+
+ // initial search
+ bestsme =
+ exhuastive_mesh_search_new(x, &temp_mv, range, interval, fn_ptr, &temp_mv,
+ lambda, nb_full_mvs, full_mv_num);
+
+ if ((interval > MIN_INTERVAL) && (range > MIN_RANGE)) {
+ // Progressive searches with range and step size decreasing each time
+ // till we reach a step size of 1. Then break out.
+ for (i = 1; i < MAX_MESH_STEP; ++i) {
+ // First pass with coarser step and longer range
+ bestsme = exhuastive_mesh_search_new(
+ x, &temp_mv, sf->mesh_patterns[i].range,
+ sf->mesh_patterns[i].interval, fn_ptr, &temp_mv, lambda, nb_full_mvs,
+ full_mv_num);
+
+ if (sf->mesh_patterns[i].interval == 1) break;
+ }
+ }
+
+ bestsme = vp9_get_mvpred_var(x, &temp_mv, &dummy_mv, fn_ptr, 0);
+ *dst_mv = temp_mv;
+
+ return bestsme;
+}
+
double vp9_diamond_search_sad_new(const MACROBLOCK *x,
const search_site_config *cfg,
const MV *init_full_mv, MV *best_full_mv,
@@ -2279,11 +2428,14 @@
double thissme;
double bestsme;
const int further_steps = MAX_MVSEARCH_STEPS - 1 - step_param;
+ const MV center_mv = { 0, 0 };
vpx_clear_system_state();
bestsme = vp9_diamond_search_sad_new(
x, &cpi->ss_cfg, mvp_full, best_mv, best_mv_dist, best_mv_cost,
step_param, lambda, &n, fn_ptr, nb_full_mvs, full_mv_num);
+ bestsme = vp9_get_mvpred_var(x, best_mv, ¢er_mv, fn_ptr, 0);
+
// If there won't be more n-step search, check to see if refining search is
// needed.
if (n > further_steps) do_refine = 0;
@@ -2299,6 +2451,7 @@
thissme = vp9_diamond_search_sad_new(
x, &cpi->ss_cfg, mvp_full, &temp_mv, &mv_dist, &mv_cost,
step_param + n, lambda, &num00, fn_ptr, nb_full_mvs, full_mv_num);
+ thissme = vp9_get_mvpred_var(x, &temp_mv, ¢er_mv, fn_ptr, 0);
// check to see if refining search is needed.
if (num00 > further_steps - n) do_refine = 0;
@@ -2320,6 +2473,7 @@
thissme = vp9_refining_search_sad_new(x, &temp_mv, &mv_dist, &mv_cost,
lambda, search_range, fn_ptr,
nb_full_mvs, full_mv_num);
+ thissme = vp9_get_mvpred_var(x, &temp_mv, ¢er_mv, fn_ptr, 0);
if (thissme < bestsme) {
bestsme = thissme;
*best_mv = temp_mv;
@@ -2327,6 +2481,9 @@
*best_mv_cost = mv_cost;
}
}
+
+ bestsme = full_pixel_exhaustive_new(cpi, x, best_mv, fn_ptr, best_mv, lambda,
+ nb_full_mvs, full_mv_num);
return bestsme;
}
#endif // CONFIG_NON_GREEDY_MV
@@ -2395,9 +2552,6 @@
return bestsme;
}
-#define MIN_RANGE 7
-#define MAX_RANGE 256
-#define MIN_INTERVAL 1
// Runs an limited range exhaustive mesh search using a pattern set
// according to the encode speed profile.
static int full_pixel_exhaustive(VP9_COMP *cpi, MACROBLOCK *x,
--- a/vp9/encoder/vp9_mcomp.h
+++ b/vp9/encoder/vp9_mcomp.h
@@ -148,6 +148,31 @@
void vp9_prepare_nb_full_mvs(const struct TplDepFrame *tpl_frame, int mi_row,
int mi_col, int rf_idx, BLOCK_SIZE bsize,
int_mv *nb_full_mvs);
+
+static INLINE BLOCK_SIZE get_square_block_size(BLOCK_SIZE bsize) {
+ BLOCK_SIZE square_bsize;
+ switch (bsize) {
+ case BLOCK_4X4:
+ case BLOCK_4X8:
+ case BLOCK_8X4: square_bsize = BLOCK_4X4; break;
+ case BLOCK_8X8:
+ case BLOCK_8X16:
+ case BLOCK_16X8: square_bsize = BLOCK_8X8; break;
+ case BLOCK_16X16:
+ case BLOCK_16X32:
+ case BLOCK_32X16: square_bsize = BLOCK_16X16; break;
+ case BLOCK_32X32:
+ case BLOCK_32X64:
+ case BLOCK_64X32:
+ case BLOCK_64X64: square_bsize = BLOCK_32X32; break;
+ default:
+ square_bsize = BLOCK_INVALID;
+ printf("ERROR: invlid block size %d\n", bsize);
+ assert(0);
+ break;
+ }
+ return square_bsize;
+}
#endif // CONFIG_NON_GREEDY_MV
#ifdef __cplusplus
} // extern "C"
--- a/vp9/encoder/vp9_rdopt.c
+++ b/vp9/encoder/vp9_rdopt.c
@@ -2316,59 +2316,20 @@
}
#if CONFIG_NON_GREEDY_MV
-#define MAX_PREV_NB_FULL_MV_NUM 8
-static int find_prev_nb_full_mvs(const VP9_COMMON *cm, const MACROBLOCKD *xd,
- int ref_frame, BLOCK_SIZE bsize, int mi_row,
- int mi_col, int_mv *nb_full_mvs) {
- int i;
- const TileInfo *tile = &xd->tile;
- int full_mv_num = 0;
- assert(bsize >= BLOCK_8X8);
- for (i = 0; i < MVREF_NEIGHBOURS; ++i) {
- const POSITION *mv_ref = &mv_ref_blocks[bsize][i];
- if (is_inside(tile, mi_col, mi_row, cm->mi_rows, mv_ref)) {
- const MODE_INFO *nb_mi =
- xd->mi[mv_ref->col + mv_ref->row * xd->mi_stride];
- if (nb_mi->sb_type >= BLOCK_8X8) {
- if (nb_mi->ref_frame[0] == ref_frame) {
- nb_full_mvs[full_mv_num].as_mv = get_full_mv(&nb_mi->mv[0].as_mv);
- ++full_mv_num;
- if (full_mv_num >= MAX_PREV_NB_FULL_MV_NUM) {
- return full_mv_num;
- }
- } else if (nb_mi->ref_frame[1] == ref_frame) {
- nb_full_mvs[full_mv_num].as_mv = get_full_mv(&nb_mi->mv[1].as_mv);
- ++full_mv_num;
- if (full_mv_num >= MAX_PREV_NB_FULL_MV_NUM) {
- return full_mv_num;
- }
- }
- } else {
- int j;
- for (j = 0; j < 4; ++j) {
- // TODO(angiebird): avoid using duplicated mvs
- if (nb_mi->ref_frame[0] == ref_frame) {
- nb_full_mvs[full_mv_num].as_mv =
- get_full_mv(&nb_mi->bmi[j].as_mv[0].as_mv);
- ++full_mv_num;
- if (full_mv_num >= MAX_PREV_NB_FULL_MV_NUM) {
- return full_mv_num;
- }
- } else if (nb_mi->ref_frame[1] == ref_frame) {
- nb_full_mvs[full_mv_num].as_mv =
- get_full_mv(&nb_mi->bmi[j].as_mv[1].as_mv);
- ++full_mv_num;
- if (full_mv_num >= MAX_PREV_NB_FULL_MV_NUM) {
- return full_mv_num;
- }
- }
- }
- }
- }
+static int ref_frame_to_gf_rf_idx(int ref_frame) {
+ if (ref_frame == GOLDEN_FRAME) {
+ return 0;
}
- return full_mv_num;
+ if (ref_frame == LAST_FRAME) {
+ return 1;
+ }
+ if (ref_frame == ALTREF_FRAME) {
+ return 2;
+ }
+ assert(0);
+ return -1;
}
-#endif // CONFIG_NON_GREEDY_MV
+#endif
static void single_motion_search(VP9_COMP *cpi, MACROBLOCK *x, BLOCK_SIZE bsize,
int mi_row, int mi_col, int_mv *tmp_mv,
@@ -2395,10 +2356,13 @@
double mv_cost = 0;
double lambda = (pw * ph) / 4;
double bestsme;
- int_mv nb_full_mvs[MAX_PREV_NB_FULL_MV_NUM];
-
- const int nb_full_mv_num =
- find_prev_nb_full_mvs(cm, xd, ref, bsize, mi_row, mi_col, nb_full_mvs);
+ int_mv nb_full_mvs[NB_MVS_NUM];
+ const int nb_full_mv_num = NB_MVS_NUM;
+ int gf_group_idx = cpi->twopass.gf_group.index;
+ int gf_rf_idx = ref_frame_to_gf_rf_idx(ref);
+ BLOCK_SIZE square_bsize = get_square_block_size(bsize);
+ vp9_prepare_nb_full_mvs(&cpi->tpl_stats[gf_group_idx], mi_row, mi_col,
+ gf_rf_idx, square_bsize, nb_full_mvs);
#else // CONFIG_NON_GREEDY_MV
int bestsme = INT_MAX;
int sadpb = x->sadperbit16;