shithub: libvpx

--- a/vp9/encoder/vp9_encoder.c

+++ b/vp9/encoder/vp9_encoder.c

@@ -2377,6 +2377,19 @@

 #endif

     // TODO(jingning): Reduce the actual memory use for tpl model build up.

     for (frame = 0; frame < MAX_ARF_GOP_SIZE; ++frame) {

+#if CONFIG_NON_GREEDY_MV

+      int sqr_bsize;

+      int rf_idx;

+      for (rf_idx = 0; rf_idx < 3; ++rf_idx) {

+        for (sqr_bsize = 0; sqr_bsize < SQUARE_BLOCK_SIZES; ++sqr_bsize) {

+          CHECK_MEM_ERROR(

+              cm, cpi->tpl_stats[frame].pyramid_mv_arr[rf_idx][sqr_bsize],

+              vpx_calloc(mi_rows * mi_cols,

+                         sizeof(*cpi->tpl_stats[frame]

+                                     .pyramid_mv_arr[rf_idx][sqr_bsize])));

+        }

+      }

+#endif

       CHECK_MEM_ERROR(cm, cpi->tpl_stats[frame].tpl_stats_ptr,

                       vpx_calloc(mi_rows * mi_cols,

                                  sizeof(*cpi->tpl_stats[frame].tpl_stats_ptr)));

@@ -5974,7 +5987,8 @@

 #if CONFIG_NON_GREEDY_MV

     (void)td;

-    mv.as_int = tpl_stats->mv_arr[rf_idx].as_int;

+    mv.as_int =

+        get_pyramid_mv(tpl_frame, rf_idx, bsize, mi_row, mi_col)->as_int;

 #else

     motion_compensated_prediction(

         cpi, td, frame_idx, xd->cur_buf->y_buffer + mb_y_offset,

@@ -6076,6 +6090,7 @@

   set_mv_limits(cm, x, mi_row, mi_col);

   for (rf_idx = 0; rf_idx < 3; ++rf_idx) {

+    int_mv *mv = get_pyramid_mv(tpl_frame, rf_idx, bsize, mi_row, mi_col);

     if (ref_frame[rf_idx] == NULL) {

       tpl_stats->ready[rf_idx] = 0;

       continue;

@@ -6085,8 +6100,8 @@

     motion_compensated_prediction(

         cpi, td, frame_idx, xd->cur_buf->y_buffer + mb_y_offset,

         ref_frame[rf_idx]->y_buffer + mb_y_offset, xd->cur_buf->y_stride, bsize,

-        mi_row, mi_col, &tpl_stats->mv_arr[rf_idx].as_mv, rf_idx,

-        &tpl_stats->mv_dist[rf_idx], &tpl_stats->mv_cost[rf_idx]);

+        mi_row, mi_col, &mv->as_mv, rf_idx, &tpl_stats->mv_dist[rf_idx],

+        &tpl_stats->mv_cost[rf_idx]);

@@ -6328,12 +6343,14 @@

             &tpl_frame->tpl_stats_ptr[mi_row * tpl_frame->stride + mi_col];

         for (rf_idx = 0; rf_idx < 3; ++rf_idx) {

 #if RE_COMPUTE_MV_INCONSISTENCY

+          MV this_mv =

+              get_pyramid_mv(tpl_frame, rf_idx, bsize, mi_row, mi_col)->as_mv;

           MV full_mv;

           int_mv nb_full_mvs[NB_MVS_NUM];

           vp9_prepare_nb_full_mvs(tpl_frame, mi_row, mi_col, rf_idx, bsize,

                                   nb_full_mvs);

-          full_mv.row = this_tpl_stats->mv_arr[rf_idx].as_mv.row >> 3;

-          full_mv.col = this_tpl_stats->mv_arr[rf_idx].as_mv.col >> 3;

+          full_mv.row = this_mv.row >> 3;

+          full_mv.col = this_mv.col >> 3;

           this_tpl_stats->mv_cost[rf_idx] =

               vp9_nb_mvs_inconsistency(&full_mv, nb_full_mvs, NB_MVS_NUM);

 #endif  // RE_COMPUTE_MV_INCONSISTENCY

@@ -6387,7 +6404,7 @@

         if ((mi_row % mi_height) == 0 && (mi_col % mi_width) == 0) {

           const TplDepStats *tpl_ptr =

               &tpl_frame->tpl_stats_ptr[mi_row * tpl_frame->stride + mi_col];

-          int_mv mv = tpl_ptr->mv_arr[idx];

+          int_mv mv = *get_pyramid_mv(tpl_frame, idx, bsize, mi_row, mi_col);

           printf("%d %d %d %d\n", mi_row, mi_col, mv.as_mv.row, mv.as_mv.col);

--- a/vp9/encoder/vp9_encoder.h

+++ b/vp9/encoder/vp9_encoder.h

@@ -297,11 +297,14 @@

   int64_t inter_cost_arr[3];

   int64_t recon_error_arr[3];

   int64_t sse_arr[3];

-  int_mv mv_arr[3];

   double feature_score;

 #endif

 } TplDepStats;

+#if CONFIG_NON_GREEDY_MV

+#define SQUARE_BLOCK_SIZES 4

+#endif

 typedef struct TplDepFrame {

   uint8_t is_valid;

   TplDepStats *tpl_stats_ptr;

@@ -315,8 +318,35 @@

   double lambda;

   double mv_dist_sum[3];

   double mv_cost_sum[3];

+  int_mv *pyramid_mv_arr[3][SQUARE_BLOCK_SIZES];

 #endif

 } TplDepFrame;

+#if CONFIG_NON_GREEDY_MV

+static INLINE int get_square_block_idx(BLOCK_SIZE bsize) {

+  if (bsize == BLOCK_4X4) {

+    return 0;

+  }

+  if (bsize == BLOCK_8X8) {

+    return 1;

+  }

+  if (bsize == BLOCK_16X16) {

+    return 2;

+  }

+  if (bsize == BLOCK_32X32) {

+    return 3;

+  }

+  printf("ERROR: non-square block size\n");

+  assert(0);

+  return -1;

+}

+static INLINE int_mv *get_pyramid_mv(const TplDepFrame *tpl_frame, int rf_idx,

+                                     BLOCK_SIZE bsize, int mi_row, int mi_col) {

+  return &tpl_frame->pyramid_mv_arr[rf_idx][get_square_block_idx(bsize)]

+                                   [mi_row * tpl_frame->stride + mi_col];

+}

+#endif

 #define TPL_DEP_COST_SCALE_LOG2 4

--- a/vp9/encoder/vp9_mcomp.c

+++ b/vp9/encoder/vp9_mcomp.c

@@ -1895,8 +1895,10 @@

       const TplDepStats *tpl_ptr =

           &tpl_frame

                ->tpl_stats_ptr[(mi_row + r) * tpl_frame->stride + mi_col + c];

+      int_mv *mv =

+          get_pyramid_mv(tpl_frame, rf_idx, bsize, mi_row + r, mi_col + c);

       if (tpl_ptr->ready[rf_idx]) {

-        nb_full_mvs[i].as_mv = get_full_mv(&tpl_ptr->mv_arr[rf_idx].as_mv);

+        nb_full_mvs[i].as_mv = get_full_mv(&mv->as_mv);

       } else {

         nb_full_mvs[i].as_int = INVALID_MV;