shithub: libvpx

Download patch

ref: 22d67c346fde9630b9accfdd3b3abe9840a061fe
parent: 88030f7d6616db6a764b9d1165245b83947fb769
parent: 6127de7a7dc05c64277cdf6b531d5588c4842d6e
author: Angie Chiang <angiebird@google.com>
date: Wed Oct 17 19:10:50 EDT 2018

Merge changes I6d5c77af,I6bf504b4,Ie5dc5ea7,Ie6024b1a,If45fba8a, ...

* changes:
  Add do_motion_search
  Preserve code of doing mv search in raster order
  Variant implementation of changing mv search order
  Add feature_score_loc_sort
  Init mv_[dist/cost]_sum in init_tpl_stats
  Change mv search order according to feature_score

--- a/vp9/encoder/vp9_encoder.c
+++ b/vp9/encoder/vp9_encoder.c
@@ -2361,6 +2361,17 @@
   if (cpi->sf.enable_tpl_model) {
     const int mi_cols = mi_cols_aligned_to_sb(cm->mi_cols);
     const int mi_rows = mi_cols_aligned_to_sb(cm->mi_rows);
+#if CONFIG_NON_GREEDY_MV
+    CHECK_MEM_ERROR(
+        cm, cpi->feature_score_loc_arr,
+        vpx_calloc(mi_rows * mi_cols, sizeof(*cpi->feature_score_loc_arr)));
+    CHECK_MEM_ERROR(
+        cm, cpi->feature_score_loc_sort,
+        vpx_calloc(mi_rows * mi_cols, sizeof(*cpi->feature_score_loc_sort)));
+    CHECK_MEM_ERROR(
+        cm, cpi->feature_score_loc_heap,
+        vpx_calloc(mi_rows * mi_cols, sizeof(*cpi->feature_score_loc_heap)));
+#endif
     // TODO(jingning): Reduce the actual memory use for tpl model build up.
     for (frame = 0; frame < MAX_ARF_GOP_SIZE; ++frame) {
       CHECK_MEM_ERROR(cm, cpi->tpl_stats[frame].tpl_stats_ptr,
@@ -2577,6 +2588,11 @@
   vp9_denoiser_free(&(cpi->denoiser));
 #endif
 
+#if CONFIG_NON_GREEDY_MV
+  vpx_free(cpi->feature_score_loc_arr);
+  vpx_free(cpi->feature_score_loc_sort);
+  vpx_free(cpi->feature_score_loc_heap);
+#endif
   for (frame = 0; frame < MAX_ARF_GOP_SIZE; ++frame) {
     vpx_free(cpi->tpl_stats[frame].tpl_stats_ptr);
     cpi->tpl_stats[frame].is_valid = 0;
@@ -5484,6 +5500,13 @@
   int frame_idx;
   for (frame_idx = 0; frame_idx < MAX_ARF_GOP_SIZE; ++frame_idx) {
     TplDepFrame *tpl_frame = &cpi->tpl_stats[frame_idx];
+#if CONFIG_NON_GREEDY_MV
+    int rf_idx;
+    for (rf_idx = 0; rf_idx < 3; ++rf_idx) {
+      tpl_frame->mv_dist_sum[rf_idx] = 0;
+      tpl_frame->mv_cost_sum[rf_idx] = 0;
+    }
+#endif
     memset(tpl_frame->tpl_stats_ptr, 0,
            tpl_frame->height * tpl_frame->width *
                sizeof(*tpl_frame->tpl_stats_ptr));
@@ -5947,6 +5970,131 @@
   tpl_stats->mv.as_int = best_mv.as_int;
 }
 
+#if CONFIG_NON_GREEDY_MV
+static int compare_feature_score(const void *a, const void *b) {
+  const FEATURE_SCORE_LOC *aa = *(FEATURE_SCORE_LOC *const *)a;
+  const FEATURE_SCORE_LOC *bb = *(FEATURE_SCORE_LOC *const *)b;
+  if (aa->feature_score < bb->feature_score) {
+    return 1;
+  } else if (aa->feature_score > bb->feature_score) {
+    return -1;
+  } else {
+    return 0;
+  }
+}
+
+static void do_motion_search(VP9_COMP *cpi, ThreadData *td, int frame_idx,
+                             YV12_BUFFER_CONFIG **ref_frame, BLOCK_SIZE bsize,
+                             int mi_row, int mi_col) {
+  VP9_COMMON *cm = &cpi->common;
+  MACROBLOCK *x = &td->mb;
+  MACROBLOCKD *xd = &x->e_mbd;
+  TplDepFrame *tpl_frame = &cpi->tpl_stats[frame_idx];
+  TplDepStats *tpl_stats =
+      &tpl_frame->tpl_stats_ptr[mi_row * tpl_frame->stride + mi_col];
+  const int mb_y_offset =
+      mi_row * MI_SIZE * xd->cur_buf->y_stride + mi_col * MI_SIZE;
+  int rf_idx;
+
+  set_mv_limits(cm, x, mi_row, mi_col);
+
+  for (rf_idx = 0; rf_idx < 3; ++rf_idx) {
+    if (ref_frame[rf_idx] == NULL) {
+      tpl_stats->ready[rf_idx] = 0;
+      continue;
+    } else {
+      tpl_stats->ready[rf_idx] = 1;
+    }
+    motion_compensated_prediction(
+        cpi, td, frame_idx, xd->cur_buf->y_buffer + mb_y_offset,
+        ref_frame[rf_idx]->y_buffer + mb_y_offset, xd->cur_buf->y_stride, bsize,
+        mi_row, mi_col, tpl_stats, rf_idx);
+  }
+}
+
+#define CHANGE_MV_SEARCH_ORDER 1
+#define USE_PQSORT 1
+
+#if CHANGE_MV_SEARCH_ORDER
+#if USE_PQSORT
+static void max_heap_pop(FEATURE_SCORE_LOC **heap, int *size,
+                         FEATURE_SCORE_LOC **output) {
+  if (*size > 0) {
+    *output = heap[0];
+    --*size;
+    if (*size > 0) {
+      int p, l, r;
+      heap[0] = heap[*size];
+      p = 0;
+      l = 2 * p + 1;
+      r = 2 * p + 2;
+      while (l < *size) {
+        FEATURE_SCORE_LOC *tmp;
+        int c = l;
+        if (r < *size && heap[r]->feature_score > heap[l]->feature_score) {
+          c = r;
+        }
+        if (heap[p]->feature_score >= heap[c]->feature_score) {
+          break;
+        }
+        tmp = heap[p];
+        heap[p] = heap[c];
+        heap[c] = tmp;
+        p = c;
+        l = 2 * p + 1;
+        r = 2 * p + 2;
+      }
+    }
+  } else {
+    assert(0);
+  }
+}
+
+static void max_heap_push(FEATURE_SCORE_LOC **heap, int *size,
+                          FEATURE_SCORE_LOC *input) {
+  int c, p;
+  FEATURE_SCORE_LOC *tmp;
+  heap[*size] = input;
+  ++*size;
+  c = *size - 1;
+  p = c >> 1;
+  while (c > 0) {
+    if (heap[c]->feature_score > heap[p]->feature_score) {
+      tmp = heap[p];
+      heap[p] = heap[c];
+      heap[c] = tmp;
+      c = p;
+      p = c >> 1;
+    } else {
+      break;
+    }
+  }
+}
+
+static void add_nb_blocks_to_heap(VP9_COMP *cpi, const TplDepFrame *tpl_frame,
+                                  BLOCK_SIZE bsize, int mi_row, int mi_col,
+                                  int *heap_size) {
+  const int mi_unit = num_8x8_blocks_wide_lookup[bsize];
+  const int dirs[NB_MVS_NUM][2] = { { -1, 0 }, { 0, -1 }, { 1, 0 }, { 0, 1 } };
+  int i;
+  for (i = 0; i < NB_MVS_NUM; ++i) {
+    int r = dirs[i][0] * mi_unit;
+    int c = dirs[i][1] * mi_unit;
+    if (mi_row + r >= 0 && mi_row + r < tpl_frame->mi_rows && mi_col + c >= 0 &&
+        mi_col + c < tpl_frame->mi_cols) {
+      FEATURE_SCORE_LOC *fs_loc =
+          &cpi->feature_score_loc_arr[(mi_row + r) * tpl_frame->stride +
+                                      (mi_col + c)];
+      if (fs_loc->visited == 0) {
+        max_heap_push(cpi->feature_score_loc_heap, heap_size, fs_loc);
+      }
+    }
+  }
+}
+#endif  // USE_PQSORT
+#endif  // CHANGE_MV_SEARCH_ORDER
+#endif  // CONFIG_NON_GREEDY_MV
+
 void mc_flow_dispenser(VP9_COMP *cpi, GF_PICTURE *gf_picture, int frame_idx,
                        BLOCK_SIZE bsize) {
   TplDepFrame *tpl_frame = &cpi->tpl_stats[frame_idx];
@@ -5979,7 +6127,15 @@
   int64_t recon_error, sse;
 #if CONFIG_NON_GREEDY_MV
   int rf_idx;
-#endif
+  int fs_loc_sort_size;
+#if CHANGE_MV_SEARCH_ORDER
+#if USE_PQSORT
+  int fs_loc_heap_size;
+#else
+  int i;
+#endif  // USE_PQSORT
+#endif  // CHANGE_MV_SEARCH_ORDER
+#endif  // CONFIG_NON_GREEDY_MV
 
   // Setup scaling factor
 #if CONFIG_VP9_HIGHBITDEPTH
@@ -6023,6 +6179,7 @@
 
 #if CONFIG_NON_GREEDY_MV
   tpl_frame->lambda = 250;
+  fs_loc_sort_size = 0;
 
   for (mi_row = 0; mi_row < cm->mi_rows; mi_row += mi_height) {
     for (mi_col = 0; mi_col < cm->mi_cols; mi_col += mi_width) {
@@ -6032,41 +6189,55 @@
       const int bh = 4 << b_height_log2_lookup[bsize];
       TplDepStats *tpl_stats =
           &tpl_frame->tpl_stats_ptr[mi_row * tpl_frame->stride + mi_col];
+      FEATURE_SCORE_LOC *fs_loc =
+          &cpi->feature_score_loc_arr[mi_row * tpl_frame->stride + mi_col];
       tpl_stats->feature_score = get_feature_score(
           xd->cur_buf->y_buffer + mb_y_offset, xd->cur_buf->y_stride, bw, bh);
+      fs_loc->visited = 0;
+      fs_loc->feature_score = tpl_stats->feature_score;
+      fs_loc->mi_row = mi_row;
+      fs_loc->mi_col = mi_col;
+      cpi->feature_score_loc_sort[fs_loc_sort_size] = fs_loc;
+      ++fs_loc_sort_size;
     }
   }
 
-  for (rf_idx = 0; rf_idx < 3; ++rf_idx) {
-    tpl_frame->mv_dist_sum[rf_idx] = 0;
-    tpl_frame->mv_cost_sum[rf_idx] = 0;
-  }
+  qsort(cpi->feature_score_loc_sort, fs_loc_sort_size,
+        sizeof(*cpi->feature_score_loc_sort), compare_feature_score);
+
+#if CHANGE_MV_SEARCH_ORDER
+#if !USE_PQSORT
+  for (i = 0; i < fs_loc_sort_size; ++i) {
+    FEATURE_SCORE_LOC *fs_loc = cpi->feature_score_loc_sort[i];
+    do_motion_search(cpi, td, frame_idx, ref_frame, bsize, fs_loc->mi_row,
+                     fs_loc->mi_col);
+  }
+#else   // !USE_PQSORT
+  fs_loc_heap_size = 0;
+  max_heap_push(cpi->feature_score_loc_heap, &fs_loc_heap_size,
+                cpi->feature_score_loc_sort[0]);
 
-  for (mi_row = 0; mi_row < cm->mi_rows; mi_row += mi_height) {
-    for (mi_col = 0; mi_col < cm->mi_cols; mi_col += mi_width) {
-      const int mb_y_offset =
-          mi_row * MI_SIZE * xd->cur_buf->y_stride + mi_col * MI_SIZE;
-      TplDepStats *tpl_stats =
-          &tpl_frame->tpl_stats_ptr[mi_row * tpl_frame->stride + mi_col];
+  while (fs_loc_heap_size > 0) {
+    FEATURE_SCORE_LOC *fs_loc;
+    max_heap_pop(cpi->feature_score_loc_heap, &fs_loc_heap_size, &fs_loc);
 
-      set_mv_limits(cm, x, mi_row, mi_col);
+    fs_loc->visited = 1;
 
-      for (rf_idx = 0; rf_idx < 3; ++rf_idx) {
-        if (ref_frame[rf_idx] == NULL) {
-          tpl_stats->ready[rf_idx] = 0;
-          continue;
-        } else {
-          tpl_stats->ready[rf_idx] = 1;
-        }
-        motion_compensated_prediction(
-            cpi, td, frame_idx, xd->cur_buf->y_buffer + mb_y_offset,
-            ref_frame[rf_idx]->y_buffer + mb_y_offset, xd->cur_buf->y_stride,
-            bsize, mi_row, mi_col, tpl_stats, rf_idx);
-      }
+    do_motion_search(cpi, td, frame_idx, ref_frame, bsize, fs_loc->mi_row,
+                     fs_loc->mi_col);
+
+    add_nb_blocks_to_heap(cpi, tpl_frame, bsize, fs_loc->mi_row, fs_loc->mi_col,
+                          &fs_loc_heap_size);
+  }
+#endif  // !USE_PQSORT
+#else   // CHANGE_MV_SEARCH_ORDER
+  for (mi_row = 0; mi_row < cm->mi_rows; mi_row += mi_height) {
+    for (mi_col = 0; mi_col < cm->mi_cols; mi_col += mi_width) {
+      do_motion_search(cpi, td, frame_idx, ref_frame, bsize, mi_row, mi_col);
     }
   }
-
-#endif
+#endif  // CHANGE_MV_SEARCH_ORDER
+#endif  // CONFIG_NON_GREEDY_MV
   for (mi_row = 0; mi_row < cm->mi_rows; mi_row += mi_height) {
     for (mi_col = 0; mi_col < cm->mi_cols; mi_col += mi_width) {
       mode_estimation(cpi, x, xd, &sf, gf_picture, frame_idx, tpl_frame,
--- a/vp9/encoder/vp9_encoder.h
+++ b/vp9/encoder/vp9_encoder.h
@@ -503,6 +503,14 @@
 
 // Maximum operating frame buffer size needed for a GOP using ARF reference.
 #define MAX_ARF_GOP_SIZE (2 * MAX_LAG_BUFFERS)
+#if CONFIG_NON_GREEDY_MV
+typedef struct FEATURE_SCORE_LOC {
+  int visited;
+  double feature_score;
+  int mi_row;
+  int mi_col;
+} FEATURE_SCORE_LOC;
+#endif
 
 typedef struct VP9_COMP {
   QUANTS quants;
@@ -530,6 +538,11 @@
   TplDepFrame tpl_stats[MAX_ARF_GOP_SIZE];
   YV12_BUFFER_CONFIG *tpl_recon_frames[REF_FRAMES];
   EncFrameBuf enc_frame_buf[REF_FRAMES];
+#if CONFIG_NON_GREEDY_MV
+  FEATURE_SCORE_LOC *feature_score_loc_arr;
+  FEATURE_SCORE_LOC **feature_score_loc_sort;
+  FEATURE_SCORE_LOC **feature_score_loc_heap;
+#endif
 
   TileDataEnc *tile_data;
   int allocated_tiles;  // Keep track of memory allocated for tiles.