ref: 22d67c346fde9630b9accfdd3b3abe9840a061fe
parent: 88030f7d6616db6a764b9d1165245b83947fb769
parent: 6127de7a7dc05c64277cdf6b531d5588c4842d6e
author: Angie Chiang <angiebird@google.com>
date: Wed Oct 17 19:10:50 EDT 2018
Merge changes I6d5c77af,I6bf504b4,Ie5dc5ea7,Ie6024b1a,If45fba8a, ... * changes: Add do_motion_search Preserve code of doing mv search in raster order Variant implementation of changing mv search order Add feature_score_loc_sort Init mv_[dist/cost]_sum in init_tpl_stats Change mv search order according to feature_score
--- a/vp9/encoder/vp9_encoder.c
+++ b/vp9/encoder/vp9_encoder.c
@@ -2361,6 +2361,17 @@
if (cpi->sf.enable_tpl_model) {
const int mi_cols = mi_cols_aligned_to_sb(cm->mi_cols);
const int mi_rows = mi_cols_aligned_to_sb(cm->mi_rows);
+#if CONFIG_NON_GREEDY_MV
+ CHECK_MEM_ERROR(
+ cm, cpi->feature_score_loc_arr,
+ vpx_calloc(mi_rows * mi_cols, sizeof(*cpi->feature_score_loc_arr)));
+ CHECK_MEM_ERROR(
+ cm, cpi->feature_score_loc_sort,
+ vpx_calloc(mi_rows * mi_cols, sizeof(*cpi->feature_score_loc_sort)));
+ CHECK_MEM_ERROR(
+ cm, cpi->feature_score_loc_heap,
+ vpx_calloc(mi_rows * mi_cols, sizeof(*cpi->feature_score_loc_heap)));
+#endif
// TODO(jingning): Reduce the actual memory use for tpl model build up.
for (frame = 0; frame < MAX_ARF_GOP_SIZE; ++frame) {
CHECK_MEM_ERROR(cm, cpi->tpl_stats[frame].tpl_stats_ptr,
@@ -2577,6 +2588,11 @@
vp9_denoiser_free(&(cpi->denoiser));
#endif
+#if CONFIG_NON_GREEDY_MV
+ vpx_free(cpi->feature_score_loc_arr);
+ vpx_free(cpi->feature_score_loc_sort);
+ vpx_free(cpi->feature_score_loc_heap);
+#endif
for (frame = 0; frame < MAX_ARF_GOP_SIZE; ++frame) {
vpx_free(cpi->tpl_stats[frame].tpl_stats_ptr);
cpi->tpl_stats[frame].is_valid = 0;
@@ -5484,6 +5500,13 @@
int frame_idx;
for (frame_idx = 0; frame_idx < MAX_ARF_GOP_SIZE; ++frame_idx) {
TplDepFrame *tpl_frame = &cpi->tpl_stats[frame_idx];
+#if CONFIG_NON_GREEDY_MV
+ int rf_idx;
+ for (rf_idx = 0; rf_idx < 3; ++rf_idx) {
+ tpl_frame->mv_dist_sum[rf_idx] = 0;
+ tpl_frame->mv_cost_sum[rf_idx] = 0;
+ }
+#endif
memset(tpl_frame->tpl_stats_ptr, 0,
tpl_frame->height * tpl_frame->width *
sizeof(*tpl_frame->tpl_stats_ptr));
@@ -5947,6 +5970,131 @@
tpl_stats->mv.as_int = best_mv.as_int;
}
+#if CONFIG_NON_GREEDY_MV
+static int compare_feature_score(const void *a, const void *b) {
+ const FEATURE_SCORE_LOC *aa = *(FEATURE_SCORE_LOC *const *)a;
+ const FEATURE_SCORE_LOC *bb = *(FEATURE_SCORE_LOC *const *)b;
+ if (aa->feature_score < bb->feature_score) {
+ return 1;
+ } else if (aa->feature_score > bb->feature_score) {
+ return -1;
+ } else {
+ return 0;
+ }
+}
+
+static void do_motion_search(VP9_COMP *cpi, ThreadData *td, int frame_idx,
+ YV12_BUFFER_CONFIG **ref_frame, BLOCK_SIZE bsize,
+ int mi_row, int mi_col) {
+ VP9_COMMON *cm = &cpi->common;
+ MACROBLOCK *x = &td->mb;
+ MACROBLOCKD *xd = &x->e_mbd;
+ TplDepFrame *tpl_frame = &cpi->tpl_stats[frame_idx];
+ TplDepStats *tpl_stats =
+ &tpl_frame->tpl_stats_ptr[mi_row * tpl_frame->stride + mi_col];
+ const int mb_y_offset =
+ mi_row * MI_SIZE * xd->cur_buf->y_stride + mi_col * MI_SIZE;
+ int rf_idx;
+
+ set_mv_limits(cm, x, mi_row, mi_col);
+
+ for (rf_idx = 0; rf_idx < 3; ++rf_idx) {
+ if (ref_frame[rf_idx] == NULL) {
+ tpl_stats->ready[rf_idx] = 0;
+ continue;
+ } else {
+ tpl_stats->ready[rf_idx] = 1;
+ }
+ motion_compensated_prediction(
+ cpi, td, frame_idx, xd->cur_buf->y_buffer + mb_y_offset,
+ ref_frame[rf_idx]->y_buffer + mb_y_offset, xd->cur_buf->y_stride, bsize,
+ mi_row, mi_col, tpl_stats, rf_idx);
+ }
+}
+
+#define CHANGE_MV_SEARCH_ORDER 1
+#define USE_PQSORT 1
+
+#if CHANGE_MV_SEARCH_ORDER
+#if USE_PQSORT
+static void max_heap_pop(FEATURE_SCORE_LOC **heap, int *size,
+ FEATURE_SCORE_LOC **output) {
+ if (*size > 0) {
+ *output = heap[0];
+ --*size;
+ if (*size > 0) {
+ int p, l, r;
+ heap[0] = heap[*size];
+ p = 0;
+ l = 2 * p + 1;
+ r = 2 * p + 2;
+ while (l < *size) {
+ FEATURE_SCORE_LOC *tmp;
+ int c = l;
+ if (r < *size && heap[r]->feature_score > heap[l]->feature_score) {
+ c = r;
+ }
+ if (heap[p]->feature_score >= heap[c]->feature_score) {
+ break;
+ }
+ tmp = heap[p];
+ heap[p] = heap[c];
+ heap[c] = tmp;
+ p = c;
+ l = 2 * p + 1;
+ r = 2 * p + 2;
+ }
+ }
+ } else {
+ assert(0);
+ }
+}
+
+static void max_heap_push(FEATURE_SCORE_LOC **heap, int *size,
+ FEATURE_SCORE_LOC *input) {
+ int c, p;
+ FEATURE_SCORE_LOC *tmp;
+ heap[*size] = input;
+ ++*size;
+ c = *size - 1;
+ p = c >> 1;
+ while (c > 0) {
+ if (heap[c]->feature_score > heap[p]->feature_score) {
+ tmp = heap[p];
+ heap[p] = heap[c];
+ heap[c] = tmp;
+ c = p;
+ p = c >> 1;
+ } else {
+ break;
+ }
+ }
+}
+
+static void add_nb_blocks_to_heap(VP9_COMP *cpi, const TplDepFrame *tpl_frame,
+ BLOCK_SIZE bsize, int mi_row, int mi_col,
+ int *heap_size) {
+ const int mi_unit = num_8x8_blocks_wide_lookup[bsize];
+ const int dirs[NB_MVS_NUM][2] = { { -1, 0 }, { 0, -1 }, { 1, 0 }, { 0, 1 } };
+ int i;
+ for (i = 0; i < NB_MVS_NUM; ++i) {
+ int r = dirs[i][0] * mi_unit;
+ int c = dirs[i][1] * mi_unit;
+ if (mi_row + r >= 0 && mi_row + r < tpl_frame->mi_rows && mi_col + c >= 0 &&
+ mi_col + c < tpl_frame->mi_cols) {
+ FEATURE_SCORE_LOC *fs_loc =
+ &cpi->feature_score_loc_arr[(mi_row + r) * tpl_frame->stride +
+ (mi_col + c)];
+ if (fs_loc->visited == 0) {
+ max_heap_push(cpi->feature_score_loc_heap, heap_size, fs_loc);
+ }
+ }
+ }
+}
+#endif // USE_PQSORT
+#endif // CHANGE_MV_SEARCH_ORDER
+#endif // CONFIG_NON_GREEDY_MV
+
void mc_flow_dispenser(VP9_COMP *cpi, GF_PICTURE *gf_picture, int frame_idx,
BLOCK_SIZE bsize) {
TplDepFrame *tpl_frame = &cpi->tpl_stats[frame_idx];
@@ -5979,7 +6127,15 @@
int64_t recon_error, sse;
#if CONFIG_NON_GREEDY_MV
int rf_idx;
-#endif
+ int fs_loc_sort_size;
+#if CHANGE_MV_SEARCH_ORDER
+#if USE_PQSORT
+ int fs_loc_heap_size;
+#else
+ int i;
+#endif // USE_PQSORT
+#endif // CHANGE_MV_SEARCH_ORDER
+#endif // CONFIG_NON_GREEDY_MV
// Setup scaling factor
#if CONFIG_VP9_HIGHBITDEPTH
@@ -6023,6 +6179,7 @@
#if CONFIG_NON_GREEDY_MV
tpl_frame->lambda = 250;
+ fs_loc_sort_size = 0;
for (mi_row = 0; mi_row < cm->mi_rows; mi_row += mi_height) {
for (mi_col = 0; mi_col < cm->mi_cols; mi_col += mi_width) {
@@ -6032,41 +6189,55 @@
const int bh = 4 << b_height_log2_lookup[bsize];
TplDepStats *tpl_stats =
&tpl_frame->tpl_stats_ptr[mi_row * tpl_frame->stride + mi_col];
+ FEATURE_SCORE_LOC *fs_loc =
+ &cpi->feature_score_loc_arr[mi_row * tpl_frame->stride + mi_col];
tpl_stats->feature_score = get_feature_score(
xd->cur_buf->y_buffer + mb_y_offset, xd->cur_buf->y_stride, bw, bh);
+ fs_loc->visited = 0;
+ fs_loc->feature_score = tpl_stats->feature_score;
+ fs_loc->mi_row = mi_row;
+ fs_loc->mi_col = mi_col;
+ cpi->feature_score_loc_sort[fs_loc_sort_size] = fs_loc;
+ ++fs_loc_sort_size;
}
}
- for (rf_idx = 0; rf_idx < 3; ++rf_idx) {
- tpl_frame->mv_dist_sum[rf_idx] = 0;
- tpl_frame->mv_cost_sum[rf_idx] = 0;
- }
+ qsort(cpi->feature_score_loc_sort, fs_loc_sort_size,
+ sizeof(*cpi->feature_score_loc_sort), compare_feature_score);
+
+#if CHANGE_MV_SEARCH_ORDER
+#if !USE_PQSORT
+ for (i = 0; i < fs_loc_sort_size; ++i) {
+ FEATURE_SCORE_LOC *fs_loc = cpi->feature_score_loc_sort[i];
+ do_motion_search(cpi, td, frame_idx, ref_frame, bsize, fs_loc->mi_row,
+ fs_loc->mi_col);
+ }
+#else // !USE_PQSORT
+ fs_loc_heap_size = 0;
+ max_heap_push(cpi->feature_score_loc_heap, &fs_loc_heap_size,
+ cpi->feature_score_loc_sort[0]);
- for (mi_row = 0; mi_row < cm->mi_rows; mi_row += mi_height) {
- for (mi_col = 0; mi_col < cm->mi_cols; mi_col += mi_width) {
- const int mb_y_offset =
- mi_row * MI_SIZE * xd->cur_buf->y_stride + mi_col * MI_SIZE;
- TplDepStats *tpl_stats =
- &tpl_frame->tpl_stats_ptr[mi_row * tpl_frame->stride + mi_col];
+ while (fs_loc_heap_size > 0) {
+ FEATURE_SCORE_LOC *fs_loc;
+ max_heap_pop(cpi->feature_score_loc_heap, &fs_loc_heap_size, &fs_loc);
- set_mv_limits(cm, x, mi_row, mi_col);
+ fs_loc->visited = 1;
- for (rf_idx = 0; rf_idx < 3; ++rf_idx) {
- if (ref_frame[rf_idx] == NULL) {
- tpl_stats->ready[rf_idx] = 0;
- continue;
- } else {
- tpl_stats->ready[rf_idx] = 1;
- }
- motion_compensated_prediction(
- cpi, td, frame_idx, xd->cur_buf->y_buffer + mb_y_offset,
- ref_frame[rf_idx]->y_buffer + mb_y_offset, xd->cur_buf->y_stride,
- bsize, mi_row, mi_col, tpl_stats, rf_idx);
- }
+ do_motion_search(cpi, td, frame_idx, ref_frame, bsize, fs_loc->mi_row,
+ fs_loc->mi_col);
+
+ add_nb_blocks_to_heap(cpi, tpl_frame, bsize, fs_loc->mi_row, fs_loc->mi_col,
+ &fs_loc_heap_size);
+ }
+#endif // !USE_PQSORT
+#else // CHANGE_MV_SEARCH_ORDER
+ for (mi_row = 0; mi_row < cm->mi_rows; mi_row += mi_height) {
+ for (mi_col = 0; mi_col < cm->mi_cols; mi_col += mi_width) {
+ do_motion_search(cpi, td, frame_idx, ref_frame, bsize, mi_row, mi_col);
}
}
-
-#endif
+#endif // CHANGE_MV_SEARCH_ORDER
+#endif // CONFIG_NON_GREEDY_MV
for (mi_row = 0; mi_row < cm->mi_rows; mi_row += mi_height) {
for (mi_col = 0; mi_col < cm->mi_cols; mi_col += mi_width) {
mode_estimation(cpi, x, xd, &sf, gf_picture, frame_idx, tpl_frame,
--- a/vp9/encoder/vp9_encoder.h
+++ b/vp9/encoder/vp9_encoder.h
@@ -503,6 +503,14 @@
// Maximum operating frame buffer size needed for a GOP using ARF reference.
#define MAX_ARF_GOP_SIZE (2 * MAX_LAG_BUFFERS)
+#if CONFIG_NON_GREEDY_MV
+typedef struct FEATURE_SCORE_LOC {
+ int visited;
+ double feature_score;
+ int mi_row;
+ int mi_col;
+} FEATURE_SCORE_LOC;
+#endif
typedef struct VP9_COMP {
QUANTS quants;
@@ -530,6 +538,11 @@
TplDepFrame tpl_stats[MAX_ARF_GOP_SIZE];
YV12_BUFFER_CONFIG *tpl_recon_frames[REF_FRAMES];
EncFrameBuf enc_frame_buf[REF_FRAMES];
+#if CONFIG_NON_GREEDY_MV
+ FEATURE_SCORE_LOC *feature_score_loc_arr;
+ FEATURE_SCORE_LOC **feature_score_loc_sort;
+ FEATURE_SCORE_LOC **feature_score_loc_heap;
+#endif
TileDataEnc *tile_data;
int allocated_tiles; // Keep track of memory allocated for tiles.