shithub: libvpx

Download patch

ref: 566905e91ec746835ffc91b1a87ddd6d352aed08
parent: b358f9076f153835680cd3af3765c7e30474c030
author: angiebird <angiebird@google.com>
date: Sat Aug 1 09:03:46 EDT 2020

Add recode loop logics for rate_ctrl experiment

Change-Id: I4de5a38e25d6b0836d90e8fcd0e56d268e5fd838

--- a/vp9/encoder/vp9_encoder.c
+++ b/vp9/encoder/vp9_encoder.c
@@ -4201,6 +4201,134 @@
   return VPXMIN(qstep, MAX_QSTEP_ADJ);
 }
 
+#if CONFIG_RATE_CTRL
+#define RATE_CTRL_MAX_RECODE_NUM 7
+
+typedef struct RATE_QINDEX_HISTORY {
+  int recode_count;
+  int q_index_history[RATE_CTRL_MAX_RECODE_NUM];
+  int rate_history[RATE_CTRL_MAX_RECODE_NUM];
+  int q_index_high;
+  int q_index_low;
+} RATE_QINDEX_HISTORY;
+
+static void init_rq_history(RATE_QINDEX_HISTORY *rq_history) {
+  rq_history->recode_count = 0;
+  rq_history->q_index_high = 255;
+  rq_history->q_index_low = 0;
+}
+
+static void update_rq_history(RATE_QINDEX_HISTORY *rq_history, int target_bits,
+                              int actual_bits, int q_index) {
+  rq_history->q_index_history[rq_history->recode_count] = q_index;
+  rq_history->rate_history[rq_history->recode_count] = actual_bits;
+  if (actual_bits <= target_bits) {
+    rq_history->q_index_high = q_index;
+  }
+  if (actual_bits >= target_bits) {
+    rq_history->q_index_low = q_index;
+  }
+  rq_history->recode_count += 1;
+}
+
+static int guess_q_index_from_model(const RATE_QSTEP_MODEL *rq_model,
+                                    int target_bits) {
+  // The model predicts bits as follows.
+  // target_bits = bias - ratio * log2(q_step)
+  // Given the target_bits, we compute the q_step as follows.
+  const double q_step =
+      pow(2.0, (rq_model->bias - target_bits) / rq_model->ratio);
+  // TODO(angiebird): Make this function support highbitdepth.
+  return vp9_convert_q_to_qindex(q_step, VPX_BITS_8);
+}
+
+static int guess_q_index_linear(int prev_q_index, int target_bits,
+                                int actual_bits, int gap) {
+  int q_index = prev_q_index;
+  if (actual_bits < target_bits) {
+    q_index -= gap;
+    q_index = VPXMAX(q_index, 0);
+  } else {
+    q_index += gap;
+    q_index = VPXMIN(q_index, 255);
+  }
+  return q_index;
+}
+
+static double get_bits_percent_diff(int target_bits, int actual_bits) {
+  double diff = abs(target_bits - actual_bits) * 1. / target_bits;
+  diff *= 100;
+  return diff;
+}
+
+static int rq_model_predict_q_index(const RATE_QSTEP_MODEL *rq_model,
+                                    const RATE_QINDEX_HISTORY *rq_history,
+                                    int target_bits) {
+  int q_index = -1;
+  if (rq_history->recode_count > 0) {
+    const int actual_bits =
+        rq_history->rate_history[rq_history->recode_count - 1];
+    const int prev_q_index =
+        rq_history->q_index_history[rq_history->recode_count - 1];
+    const double percent_diff = get_bits_percent_diff(target_bits, actual_bits);
+    if (percent_diff > 50) {
+      // Binary search.
+      // When the actual_bits and target_bits are far apart, binary search
+      // q_index is faster.
+      q_index = (rq_history->q_index_low + rq_history->q_index_high) / 2;
+    } else {
+      if (rq_model->ready) {
+        q_index = guess_q_index_from_model(rq_model, target_bits);
+      } else {
+        // TODO(angiebird): Find a better way to set the gap.
+        q_index =
+            guess_q_index_linear(prev_q_index, target_bits, actual_bits, 20);
+      }
+    }
+  } else {
+    if (rq_model->ready) {
+      q_index = guess_q_index_from_model(rq_model, target_bits);
+    }
+  }
+
+  assert(rq_history->q_index_low <= rq_history->q_index_high);
+  if (q_index <= rq_history->q_index_low) {
+    q_index = rq_history->q_index_low + 1;
+  }
+  if (q_index >= rq_history->q_index_high) {
+    q_index = rq_history->q_index_high - 1;
+  }
+  return q_index;
+}
+
+static void rq_model_update(const RATE_QINDEX_HISTORY *rq_history,
+                            int target_bits, RATE_QSTEP_MODEL *rq_model) {
+  const int recode_count = rq_history->recode_count;
+  if (recode_count >= 2) {
+    // Fit the ratio and bias of rq_model based on last two recode histories.
+    const double s1 = vp9_convert_qindex_to_q(
+        rq_history->q_index_history[recode_count - 2], VPX_BITS_8);
+    const double s2 = vp9_convert_qindex_to_q(
+        rq_history->q_index_history[recode_count - 1], VPX_BITS_8);
+    const double r1 = rq_history->rate_history[recode_count - 2];
+    const double r2 = rq_history->rate_history[recode_count - 1];
+    rq_model->ratio = (r2 - r1) / (log2(s1) - log2(s2));
+    rq_model->bias = r1 + (rq_model->ratio) * log2(s1);
+    rq_model->ready = 1;
+  } else if (recode_count == 1) {
+    if (rq_model->ready) {
+      // Update the ratio only when the initial model exists and we only have
+      // one recode history.
+      const int prev_q = rq_history->q_index_history[recode_count - 1];
+      const double prev_q_step = vp9_convert_qindex_to_q(prev_q, VPX_BITS_8);
+      const int actual_bits = rq_history->rate_history[recode_count - 1];
+      rq_model->ratio =
+          rq_model->ratio - (target_bits - actual_bits) / log2(prev_q_step);
+    }
+  }
+}
+#endif  // CONFIG_RATE_CTRL
+
 static void encode_with_recode_loop(VP9_COMP *cpi, size_t *size,
                                     uint8_t *dest) {
   const VP9EncoderConfig *const oxcf = &cpi->oxcf;
@@ -4220,6 +4348,15 @@
   int qrange_adj = 1;
 #endif
 
+#if CONFIG_RATE_CTRL
+  const FRAME_UPDATE_TYPE update_type =
+      cpi->twopass.gf_group.update_type[cpi->twopass.gf_group.index];
+  const ENCODE_FRAME_TYPE frame_type = get_encode_frame_type(update_type);
+  RATE_QSTEP_MODEL *rq_model = &cpi->rq_model[frame_type];
+  RATE_QINDEX_HISTORY rq_history;
+  init_rq_history(&rq_history);
+#endif  // CONFIG_RATE_CTRL
+
   if (cm->show_existing_frame) {
     rc->this_frame_target = 0;
     if (is_psnr_calc_enabled(cpi)) set_raw_source_frame(cpi);
@@ -4266,6 +4403,15 @@
       loop_at_this_size = 0;
     }
 
+#if CONFIG_RATE_CTRL
+    {
+      const int suggested_q_index = rq_model_predict_q_index(
+          rq_model, &rq_history, rc->this_frame_target);
+      if (suggested_q_index != -1) {
+        q = suggested_q_index;
+      }
+    }
+#endif  // CONFIG_RATE_CTRL
     // Decide frame size bounds first time through.
     if (loop_count == 0) {
       vp9_rc_compute_frame_size_bounds(cpi, rc->this_frame_target,
@@ -4359,7 +4505,28 @@
     if (cpi->encode_command.use_external_quantize_index) {
       break;
     }
-#endif
+
+    if (cpi->encode_command.use_external_target_frame_bits) {
+      const double percent_diff = get_bits_percent_diff(
+          rc->this_frame_target, rc->projected_frame_size);
+      update_rq_history(&rq_history, rc->this_frame_target,
+                        rc->projected_frame_size, q);
+      loop_count += 1;
+
+      rq_model_update(&rq_history, rc->this_frame_target, rq_model);
+
+      // Check if we hit the target bitrate.
+      if (percent_diff <= 15 ||
+          rq_history.recode_count >= RATE_CTRL_MAX_RECODE_NUM ||
+          rq_history.q_index_low >= rq_history.q_index_high) {
+        break;
+      }
+
+      loop = 1;
+      restore_coding_context(cpi);
+      continue;
+    }
+#endif  // CONFIG_RATE_CTRL
 
     if (oxcf->rc_mode == VPX_Q) {
       loop = 0;
--- a/vp9/encoder/vp9_encoder.h
+++ b/vp9/encoder/vp9_encoder.h
@@ -560,6 +560,41 @@
   return gop_command->show_frame_count + gop_command->use_alt_ref;
 }
 
+// TODO(angiebird): See if we can merge this one with FrameType in
+// simple_encode.h
+typedef enum ENCODE_FRAME_TYPE {
+  ENCODE_FRAME_TYPE_KEY,
+  ENCODE_FRAME_TYPE_INTER,
+  ENCODE_FRAME_TYPE_ALTREF,
+  ENCODE_FRAME_TYPE_OVERLAY,
+  ENCODE_FRAME_TYPE_GOLDEN,
+  ENCODE_FRAME_TYPES,
+} ENCODE_FRAME_TYPE;
+
+// TODO(angiebird): Merge this function with get_frame_type_from_update_type()
+static INLINE ENCODE_FRAME_TYPE
+get_encode_frame_type(FRAME_UPDATE_TYPE update_type) {
+  switch (update_type) {
+    case KF_UPDATE: return ENCODE_FRAME_TYPE_KEY;
+    case ARF_UPDATE: return ENCODE_FRAME_TYPE_ALTREF;
+    case GF_UPDATE: return ENCODE_FRAME_TYPE_GOLDEN;
+    case OVERLAY_UPDATE: return ENCODE_FRAME_TYPE_OVERLAY;
+    case LF_UPDATE: return ENCODE_FRAME_TYPE_INTER;
+    default:
+      fprintf(stderr, "Unsupported update_type %d\n", update_type);
+      abort();
+      return ENCODE_FRAME_TYPE_INTER;
+  }
+}
+
+typedef struct RATE_QSTEP_MODEL {
+  // The rq model predict the bit usage as follows.
+  // rate = bias - ratio * log2(q_step)
+  int ready;
+  double bias;
+  double ratio;
+} RATE_QSTEP_MODEL;
+
 typedef struct ENCODE_COMMAND {
   int use_external_quantize_index;
   int external_quantize_index;
@@ -917,6 +952,8 @@
   ENCODE_COMMAND encode_command;
   PARTITION_INFO *partition_info;
   MOTION_VECTOR_INFO *motion_vector_info;
+
+  RATE_QSTEP_MODEL rq_model[ENCODE_FRAME_TYPES];
 #endif
 } VP9_COMP;