shithub: libvpx

Download patch

ref: 1bb29e24553bc03c8cb2b3bb11daff8c109cc462
parent: 24b16ce7c9da42b23484174a41d1c5ff2ea4929a
parent: 3df55cebb25d18f75f7711f84d9786ac2ce33b72
author: Jingning Han <jingning@google.com>
date: Mon Jul 2 13:01:45 EDT 2018

Merge "Exploit the spatial variance in temporal dependency model"

--- a/vp9/encoder/vp9_block.h
+++ b/vp9/encoder/vp9_block.h
@@ -92,6 +92,7 @@
   int sadperbit4;
   int rddiv;
   int rdmult;
+  int cb_rdmult;
   int mb_energy;
 
   // These are set to their default values at the beginning, and then adjusted
--- a/vp9/encoder/vp9_encodeframe.c
+++ b/vp9/encoder/vp9_encodeframe.c
@@ -1960,6 +1960,8 @@
       x->rdmult = vp9_cyclic_refresh_get_rdmult(cpi->cyclic_refresh);
   }
 
+  if (cpi->sf.enable_tpl_model) x->rdmult = x->cb_rdmult;
+
   // Find best coding mode & reconstruct the MB so it is available
   // as a predictor for MBs that follow in the SB
   if (frame_is_intra_only(cm)) {
@@ -1986,8 +1988,6 @@
     vp9_caq_select_segment(cpi, x, bsize, mi_row, mi_col, rd_cost->rate);
   }
 
-  x->rdmult = orig_rdmult;
-
   // TODO(jingning) The rate-distortion optimization flow needs to be
   // refactored to provide proper exit/return handle.
   if (rd_cost->rate == INT_MAX)
@@ -1995,6 +1995,8 @@
   else
     rd_cost->rdcost = RDCOST(x->rdmult, x->rddiv, rd_cost->rate, rd_cost->dist);
 
+  x->rdmult = orig_rdmult;
+
   ctx->rate = rd_cost->rate;
   ctx->dist = rd_cost->dist;
 }
@@ -2120,6 +2122,7 @@
                      PICK_MODE_CONTEXT *ctx) {
   MACROBLOCK *const x = &td->mb;
   set_offsets(cpi, tile, x, mi_row, mi_col, bsize);
+  if (cpi->sf.enable_tpl_model) x->rdmult = x->cb_rdmult;
   update_state(cpi, td, ctx, mi_row, mi_col, bsize, output_enabled);
   encode_superblock(cpi, td, tp, output_enabled, mi_row, mi_col, bsize, ctx);
 
@@ -3611,6 +3614,47 @@
 #undef Q_CTX
 #undef RESOLUTION_CTX
 
+int get_rdmult_delta(VP9_COMP *cpi, BLOCK_SIZE bsize, int mi_row, int mi_col,
+                     int orig_rdmult) {
+  TplDepFrame *tpl_frame = &cpi->tpl_stats[cpi->twopass.gf_group.index];
+  TplDepStats *tpl_stats = tpl_frame->tpl_stats_ptr;
+  int tpl_stride = tpl_frame->stride;
+  int64_t intra_cost = 0;
+  int64_t mc_dep_cost = 0;
+  int mi_wide = num_8x8_blocks_wide_lookup[bsize];
+  int mi_high = num_8x8_blocks_high_lookup[bsize];
+  int row, col;
+
+  int dr = 0;
+  int count = 0;
+  double r0, rk, beta;
+
+  r0 = cpi->rd.r0;
+
+  for (row = mi_row; row < mi_row + mi_high; ++row) {
+    for (col = mi_col; col < mi_col + mi_wide; ++col) {
+      TplDepStats *this_stats = &tpl_stats[row * tpl_stride + col];
+
+      if (row >= cpi->common.mi_rows || col >= cpi->common.mi_cols) continue;
+
+      intra_cost += this_stats->intra_cost;
+      mc_dep_cost += this_stats->mc_dep_cost;
+
+      ++count;
+    }
+  }
+
+  rk = (double)intra_cost / (intra_cost + mc_dep_cost);
+  beta = r0 / rk;
+  dr = vp9_get_adaptive_rdmult(cpi, beta);
+
+  dr = VPXMIN(dr, orig_rdmult * 5 / 4);
+  dr = VPXMAX(dr, orig_rdmult * 3 / 4);
+
+  dr = VPXMAX(1, dr);
+  return dr;
+}
+
 // TODO(jingning,jimbankoski,rbultje): properly skip partition types that are
 // unlikely to be selected depending on previous rate-distortion optimization
 // results, for encoding speed-up.
@@ -3660,7 +3704,7 @@
   int rate_breakout_thr = cpi->sf.partition_search_breakout_thr.rate;
   int must_split = 0;
 
-  int partition_mul = cpi->rd.RDMULT;
+  int partition_mul = cpi->sf.enable_tpl_model ? x->cb_rdmult : cpi->rd.RDMULT;
 
   (void)*tp_orig;
 
@@ -4176,6 +4220,14 @@
       rd_use_partition(cpi, td, tile_data, mi, tp, mi_row, mi_col, BLOCK_64X64,
                        &dummy_rate, &dummy_dist, 1, td->pc_root);
     } else {
+      int orig_rdmult = cpi->rd.RDMULT;
+      x->cb_rdmult = orig_rdmult;
+      if (cpi->twopass.gf_group.index > 0 && cpi->sf.enable_tpl_model) {
+        int dr =
+            get_rdmult_delta(cpi, BLOCK_64X64, mi_row, mi_col, orig_rdmult);
+        x->cb_rdmult = dr;
+      }
+
       // If required set upper and lower partition size limits
       if (sf->auto_min_max_partition_size) {
         set_offsets(cpi, tile_info, x, mi_row, mi_col, BLOCK_64X64);
@@ -5386,6 +5438,24 @@
 
     if (sf->partition_search_type == SOURCE_VAR_BASED_PARTITION)
       source_var_based_partition_search_method(cpi);
+  } else if (cpi->twopass.gf_group.index) {
+    TplDepFrame *tpl_frame = &cpi->tpl_stats[cpi->twopass.gf_group.index];
+    TplDepStats *tpl_stats = tpl_frame->tpl_stats_ptr;
+
+    int tpl_stride = tpl_frame->stride;
+    int64_t intra_cost_base = 0;
+    int64_t mc_dep_cost_base = 0;
+    int row, col;
+
+    for (row = 0; row < cm->mi_rows; ++row) {
+      for (col = 0; col < cm->mi_cols; ++col) {
+        TplDepStats *this_stats = &tpl_stats[row * tpl_stride + col];
+        intra_cost_base += this_stats->intra_cost;
+        mc_dep_cost_base += this_stats->mc_dep_cost;
+      }
+    }
+
+    cpi->rd.r0 = (double)intra_cost_base / (intra_cost_base + mc_dep_cost_base);
   }
 
   {
--- a/vp9/encoder/vp9_encoder.c
+++ b/vp9/encoder/vp9_encoder.c
@@ -5431,6 +5431,16 @@
   return overlap_area = width * height;
 }
 
+int round_floor(int ref_pos) {
+  int round;
+  if (ref_pos < 0)
+    round = -(1 + (-ref_pos - 1) / MI_SIZE);
+  else
+    round = ref_pos / MI_SIZE;
+
+  return round;
+}
+
 void tpl_model_update(TplDepFrame *tpl_frame, TplDepStats *tpl_stats,
                       int mi_row, int mi_col) {
   TplDepFrame *ref_tpl_frame = &tpl_frame[tpl_stats->ref_frame_index];
@@ -5443,8 +5453,8 @@
   int ref_pos_col = mi_col * MI_SIZE + mv_col;
 
   // top-left on grid block location
-  int grid_pos_row_base = (ref_pos_row >> MI_SIZE_LOG2) << MI_SIZE_LOG2;
-  int grid_pos_col_base = (ref_pos_col >> MI_SIZE_LOG2) << MI_SIZE_LOG2;
+  int grid_pos_row_base = round_floor(ref_pos_row) * MI_SIZE;
+  int grid_pos_col_base = round_floor(ref_pos_col) * MI_SIZE;
   int block;
 
   for (block = 0; block < 4; ++block) {
@@ -5455,8 +5465,8 @@
         grid_pos_col >= 0 && grid_pos_col < ref_tpl_frame->mi_cols * MI_SIZE) {
       int overlap_area = get_overlap_area(grid_pos_row, grid_pos_col,
                                           ref_pos_row, ref_pos_col, block);
-      int ref_mi_row = grid_pos_row >> MI_SIZE_LOG2;
-      int ref_mi_col = grid_pos_col >> MI_SIZE_LOG2;
+      int ref_mi_row = round_floor(grid_pos_row);
+      int ref_mi_col = round_floor(grid_pos_col);
 
       int64_t mc_flow = tpl_stats->mc_dep_cost -
                         (tpl_stats->mc_dep_cost * tpl_stats->inter_cost) /
--- a/vp9/encoder/vp9_encoder.h
+++ b/vp9/encoder/vp9_encoder.h
@@ -299,7 +299,7 @@
   int mi_cols;
 } TplDepFrame;
 
-#define TPL_DEP_COST_SCALE_LOG2 16
+#define TPL_DEP_COST_SCALE_LOG2 4
 
 // TODO(jingning) All spatially adaptive variables should go to TileDataEnc.
 typedef struct TileDataEnc {
--- a/vp9/encoder/vp9_rd.c
+++ b/vp9/encoder/vp9_rd.c
@@ -200,6 +200,38 @@
   return (int)rdmult;
 }
 
+int vp9_get_adaptive_rdmult(const VP9_COMP *cpi, double beta) {
+  const VP9_COMMON *cm = &cpi->common;
+  int64_t q = vp9_dc_quant(cm->base_qindex, 0, cpi->common.bit_depth);
+
+#if CONFIG_VP9_HIGHBITDEPTH
+  int64_t rdmult = 0;
+  switch (cpi->common.bit_depth) {
+    case VPX_BITS_8: rdmult = (int)((88 * q * q / beta) / 24); break;
+    case VPX_BITS_10:
+      rdmult = ROUND_POWER_OF_TWO((int)((88 * q * q / beta) / 24), 4);
+      break;
+    default:
+      assert(cpi->common.bit_depth == VPX_BITS_12);
+      rdmult = ROUND_POWER_OF_TWO((int)((88 * q * q / beta) / 24), 8);
+      break;
+  }
+#else
+  int64_t rdmult = (int)((88 * q * q / beta) / 24);
+#endif  // CONFIG_VP9_HIGHBITDEPTH
+
+  if (cpi->oxcf.pass == 2 && (cpi->common.frame_type != KEY_FRAME)) {
+    const GF_GROUP *const gf_group = &cpi->twopass.gf_group;
+    const FRAME_UPDATE_TYPE frame_type = gf_group->update_type[gf_group->index];
+    const int boost_index = VPXMIN(15, (cpi->rc.gfu_boost / 100));
+
+    rdmult = (rdmult * rd_frame_type_factor[frame_type]) >> 7;
+    rdmult += ((rdmult * rd_boost_factor[boost_index]) >> 7);
+  }
+  if (rdmult < 1) rdmult = 1;
+  return (int)rdmult;
+}
+
 static int compute_rd_thresh_factor(int qindex, vpx_bit_depth_t bit_depth) {
   double q;
 #if CONFIG_VP9_HIGHBITDEPTH
--- a/vp9/encoder/vp9_rd.h
+++ b/vp9/encoder/vp9_rd.h
@@ -115,6 +115,7 @@
 #endif
   int RDMULT;
   int RDDIV;
+  double r0;
 } RD_OPT;
 
 typedef struct RD_COST {
@@ -137,6 +138,8 @@
                                             int qindex);
 
 int vp9_compute_rd_mult(const struct VP9_COMP *cpi, int qindex);
+
+int vp9_get_adaptive_rdmult(const struct VP9_COMP *cpi, double beta);
 
 void vp9_initialize_rd_consts(struct VP9_COMP *cpi);