shithub: libvpx

Download patch

ref: d0567bd779febe995020668cc7f6c1193e3e41d6
parent: ebefb90b75f07ea5ab06d6b2a5ea5355c843d266
author: Paul Wilkins <paulwilkins@google.com>
date: Wed Mar 3 11:45:42 EST 2021

Add fields into RC for Vizier ML experiments.

This patch adds fields into the RC data structure for the Vizier.

The added fields allow control of some extra rate control parameters
and rate distortion.

This patch also adds functions to initialize the various parameters
though many are not yet used / wired in and for now all are set to
default values. Ultimately many will be set through new command
line options.

Change-Id: I41591bb627d3837d2104fb363845adedbddf2e02

--- a/vp9/encoder/vp9_encoder.c
+++ b/vp9/encoder/vp9_encoder.c
@@ -2317,6 +2317,7 @@
   cpi->frame_info = vp9_get_frame_info(oxcf);
 
   vp9_rc_init(&cpi->oxcf, oxcf->pass, &cpi->rc);
+  vp9_init_rd_parameters(cpi);
 
   init_frame_indexes(cm);
   cpi->partition_search_skippable_frame = 0;
--- a/vp9/encoder/vp9_encoder.h
+++ b/vp9/encoder/vp9_encoder.h
@@ -746,6 +746,7 @@
   // Ambient reconstruction err target for force key frames
   int64_t ambient_err;
 
+  RD_CONTROL rd_ctrl;
   RD_OPT rd;
 
   CODING_CONTEXT coding_context;
--- a/vp9/encoder/vp9_ext_ratectrl.h
+++ b/vp9/encoder/vp9_ext_ratectrl.h
@@ -43,6 +43,6 @@
     EXT_RATECTRL *ext_ratectrl, int64_t bit_count,
     const YV12_BUFFER_CONFIG *source_frame,
     const YV12_BUFFER_CONFIG *coded_frame, uint32_t bit_depth,
-    uint32_t input_bit_depth, int actual_encoding_qindex);
+    uint32_t input_bit_depth, const int actual_encoding_qindex);
 
 #endif  // VPX_VP9_ENCODER_VP9_EXT_RATECTRL_H_
--- a/vp9/encoder/vp9_firstpass.c
+++ b/vp9/encoder/vp9_firstpass.c
@@ -54,6 +54,31 @@
 #define NCOUNT_INTRA_THRESH 8192
 #define NCOUNT_INTRA_FACTOR 3
 
+#define SR_DIFF_PART 0.0015
+#define INTRA_PART 0.005
+#define DEFAULT_DECAY_LIMIT 0.75
+#define LOW_SR_DIFF_TRHESH 0.1
+#define SR_DIFF_MAX 128.0
+#define LOW_CODED_ERR_PER_MB 10.0
+#define NCOUNT_FRAME_II_THRESH 6.0
+#define BASELINE_ERR_PER_MB 12500.0
+#define GF_MAX_FRAME_BOOST 96.0
+
+#ifdef AGGRESSIVE_VBR
+#define KF_MAX_FRAME_BOOST 80.0
+#define MAX_KF_TOT_BOOST 4800
+#else
+#define KF_MAX_FRAME_BOOST 96.0
+#define MAX_KF_TOT_BOOST 5400
+#endif
+
+#define ZM_POWER_FACTOR 0.75
+#define MINQ_ADJ_LIMIT 48
+#define MINQ_ADJ_LIMIT_CQ 20
+#define HIGH_UNDERSHOOT_RATIO 2
+#define AV_WQ_FACTOR 4.0
+#define DEF_EPMB_LOW 2000.0
+
 #define DOUBLE_DIVIDE_CHECK(x) ((x) < 0 ? (x)-0.000001 : (x) + 0.000001)
 
 #if ARF_STATS_OUTPUT
@@ -1807,14 +1832,6 @@
   twopass->arnr_strength_adjustment = 0;
 }
 
-#define SR_DIFF_PART 0.0015
-#define INTRA_PART 0.005
-#define DEFAULT_DECAY_LIMIT 0.75
-#define LOW_SR_DIFF_TRHESH 0.1
-#define SR_DIFF_MAX 128.0
-#define LOW_CODED_ERR_PER_MB 10.0
-#define NCOUNT_FRAME_II_THRESH 6.0
-
 static double get_sr_decay_rate(const FRAME_INFO *frame_info,
                                 const FIRSTPASS_STATS *frame) {
   double sr_diff = (frame->sr_coded_error - frame->coded_error);
@@ -1853,8 +1870,6 @@
   return VPXMIN(sr_decay, zero_motion_pct);
 }
 
-#define ZM_POWER_FACTOR 0.75
-
 static double get_prediction_decay_rate(const FRAME_INFO *frame_info,
                                         const FIRSTPASS_STATS *frame_stats) {
   const double sr_decay_rate = get_sr_decay_rate(frame_info, frame_stats);
@@ -1942,8 +1957,6 @@
   }
 }
 
-#define BASELINE_ERR_PER_MB 12500.0
-#define GF_MAX_BOOST 96.0
 static double calc_frame_boost(const FRAME_INFO *frame_info,
                                const FIRSTPASS_STATS *this_frame,
                                int avg_frame_qindex,
@@ -1965,7 +1978,7 @@
   // Q correction and scalling
   frame_boost = frame_boost * boost_q_correction;
 
-  return VPXMIN(frame_boost, GF_MAX_BOOST * boost_q_correction);
+  return VPXMIN(frame_boost, GF_MAX_FRAME_BOOST * boost_q_correction);
 }
 
 static double kf_err_per_mb(VP9_COMP *cpi) {
@@ -3159,14 +3172,6 @@
 #define MIN_SCAN_FRAMES_FOR_KF_BOOST 32
 #define KF_ABS_ZOOM_THRESH 6.0
 
-#ifdef AGGRESSIVE_VBR
-#define KF_MAX_FRAME_BOOST 80.0
-#define MAX_KF_TOT_BOOST 4800
-#else
-#define KF_MAX_FRAME_BOOST 96.0
-#define MAX_KF_TOT_BOOST 5400
-#endif
-
 int vp9_get_frames_to_next_key(const VP9EncoderConfig *oxcf,
                                const FRAME_INFO *frame_info,
                                const FIRST_PASS_INFO *first_pass_info,
@@ -3470,6 +3475,113 @@
           twopass->stats_in->pcnt_inter - twopass->stats_in->pcnt_motion == 1);
 }
 
+// Configure image size specific vizier parameters.
+// Later these will be set via additional command line options
+static void init_vizier_params(RATE_CONTROL *const rc, int screen_area) {
+  if (1) {
+    // Force defaults for now
+    rc->active_wq_factor = AV_WQ_FACTOR;
+    rc->base_err_per_mb = BASELINE_ERR_PER_MB;
+    rc->sr_default_decay_limit = DEFAULT_DECAY_LIMIT;
+    rc->sr_diff_part = SR_DIFF_PART;
+    rc->gf_frame_max_boost = GF_MAX_FRAME_BOOST;
+    rc->gf_max_total_boost = MAX_GF_BOOST;
+    rc->kf_err_per_mb = DEF_EPMB_LOW;
+    rc->kf_frame_max_boost_first = KF_MAX_FRAME_BOOST;     // Max for first kf.
+    rc->kf_frame_max_boost_subs = KF_MAX_FRAME_BOOST / 2;  // Max for other kfs.
+    rc->kf_max_total_boost = MAX_KF_TOT_BOOST;
+    rc->zm_power_factor = ZM_POWER_FACTOR;
+  } else {
+    // Vizer experimental parameters from training.
+    // Later these will be set via the command line.
+    if (screen_area <= 176 * 144) {
+      rc->active_wq_factor = 46.0;
+      rc->base_err_per_mb = 37597.399760969536;
+      rc->sr_default_decay_limit = 0.3905639800962774;
+      rc->sr_diff_part = 0.009599023654146284;
+      rc->gf_frame_max_boost = 87.27362648627846;
+      rc->gf_max_total_boost = MAX_GF_BOOST;
+      rc->kf_err_per_mb = 1854.8255436877148;
+      rc->kf_frame_max_boost_first = KF_MAX_FRAME_BOOST;
+      rc->kf_frame_max_boost_subs = rc->kf_frame_max_boost_first / 2;
+      rc->kf_max_total_boost = MAX_KF_TOT_BOOST;
+      rc->zm_power_factor = 2.93715229184991;
+    } else if (screen_area <= 320 * 240) {
+      rc->active_wq_factor = 55.0;
+      rc->base_err_per_mb = 34525.33177195309;
+      rc->sr_default_decay_limit = 0.23901360046804604;
+      rc->sr_diff_part = 0.008581014394766773;
+      rc->gf_frame_max_boost = 127.34978204980285;
+      rc->gf_max_total_boost = MAX_GF_BOOST;
+      rc->kf_err_per_mb = 723.8337508755031;
+      rc->kf_frame_max_boost_first = KF_MAX_FRAME_BOOST;
+      rc->kf_frame_max_boost_subs = rc->kf_frame_max_boost_first / 2;
+      rc->kf_max_total_boost = MAX_KF_TOT_BOOST;
+      rc->zm_power_factor = 3.5299221493593413;
+    } else if (screen_area <= 640 * 360) {
+      rc->active_wq_factor = 12.5;
+      rc->base_err_per_mb = 18823.978018028298;
+      rc->sr_default_decay_limit = 0.6043527690301296;
+      rc->sr_diff_part = 0.00343296783885544;
+      rc->gf_frame_max_boost = 75.17672317013668;
+      rc->gf_max_total_boost = MAX_GF_BOOST;
+      rc->kf_err_per_mb = 422.2871502380377;
+      rc->kf_frame_max_boost_first = KF_MAX_FRAME_BOOST;
+      rc->kf_frame_max_boost_subs = rc->kf_frame_max_boost_first / 2;
+      rc->kf_max_total_boost = MAX_KF_TOT_BOOST;
+      rc->zm_power_factor = 2.265742666649307;
+    } else if (screen_area <= 854 * 480) {
+      rc->active_wq_factor = 51.5;
+      rc->base_err_per_mb = 33718.98307662595;
+      rc->sr_default_decay_limit = 0.33633414970713393;
+      rc->sr_diff_part = 0.00868988716928333;
+      rc->gf_frame_max_boost = 85.2868528581522;
+      rc->gf_max_total_boost = MAX_GF_BOOST;
+      rc->kf_err_per_mb = 1513.4883914008383;
+      rc->kf_frame_max_boost_first = KF_MAX_FRAME_BOOST;
+      rc->kf_frame_max_boost_subs = rc->kf_frame_max_boost_first / 2;
+      rc->kf_max_total_boost = MAX_KF_TOT_BOOST;
+      rc->zm_power_factor = 3.552278528517416;
+    } else if (screen_area <= 1280 * 720) {
+      rc->active_wq_factor = 41.5;
+      rc->base_err_per_mb = 29527.46375825401;
+      rc->sr_default_decay_limit = 0.5009117586299728;
+      rc->sr_diff_part = 0.005007364627260114;
+      rc->gf_frame_max_boost = GF_MAX_FRAME_BOOST;
+      rc->gf_max_total_boost = MAX_GF_BOOST;
+      rc->kf_err_per_mb = 998.6342911785146;
+      rc->kf_frame_max_boost_first = KF_MAX_FRAME_BOOST;
+      rc->kf_frame_max_boost_subs = rc->kf_frame_max_boost_first / 2;
+      rc->kf_max_total_boost = MAX_KF_TOT_BOOST;
+      rc->zm_power_factor = 2.568627575572356;
+    } else if (screen_area <= 1920 * 1080) {
+      rc->active_wq_factor = 31.0;
+      rc->base_err_per_mb = 34474.723463367416;
+      rc->sr_default_decay_limit = 0.23346886902707745;
+      rc->sr_diff_part = 0.011431716637966029;
+      rc->gf_frame_max_boost = 81.00472969483079;
+      rc->gf_max_total_boost = MAX_GF_BOOST;
+      rc->kf_err_per_mb = 35931.25734431429;
+      rc->kf_frame_max_boost_first = KF_MAX_FRAME_BOOST;
+      rc->kf_frame_max_boost_subs = rc->kf_frame_max_boost_first / 2;
+      rc->kf_max_total_boost = MAX_KF_TOT_BOOST;
+      rc->zm_power_factor = 5.5776463538431935;
+    } else {
+      rc->active_wq_factor = AV_WQ_FACTOR;
+      rc->base_err_per_mb = BASELINE_ERR_PER_MB;
+      rc->sr_default_decay_limit = DEFAULT_DECAY_LIMIT;
+      rc->sr_diff_part = SR_DIFF_PART;
+      rc->gf_frame_max_boost = GF_MAX_FRAME_BOOST;
+      rc->gf_max_total_boost = MAX_GF_BOOST;
+      rc->kf_err_per_mb = DEF_EPMB_LOW;
+      rc->kf_frame_max_boost_first = KF_MAX_FRAME_BOOST;
+      rc->kf_frame_max_boost_subs = rc->kf_frame_max_boost_first / 2;
+      rc->kf_max_total_boost = MAX_KF_TOT_BOOST;
+      rc->zm_power_factor = ZM_POWER_FACTOR;
+    }
+  }
+}
+
 void vp9_rc_get_second_pass_params(VP9_COMP *cpi) {
   VP9_COMMON *const cm = &cpi->common;
   RATE_CONTROL *const rc = &cpi->rc;
@@ -3480,6 +3592,13 @@
 
   if (!twopass->stats_in) return;
 
+  // Configure image size specific vizier parameters
+  if (cm->current_video_frame == 0) {
+    unsigned int screen_area = (cm->width * cm->height);
+
+    init_vizier_params(rc, screen_area);
+  }
+
   // If this is an arf frame then we dont want to read the stats file or
   // advance the input pointer as we already have what we need.
   if (gf_group->update_type[gf_group->index] == ARF_UPDATE) {
@@ -3605,9 +3724,6 @@
   subtract_stats(&twopass->total_left_stats, &this_frame);
 }
 
-#define MINQ_ADJ_LIMIT 48
-#define MINQ_ADJ_LIMIT_CQ 20
-#define HIGH_UNDERSHOOT_RATIO 2
 void vp9_twopass_postencode_update(VP9_COMP *cpi) {
   TWO_PASS *const twopass = &cpi->twopass;
   RATE_CONTROL *const rc = &cpi->rc;
--- a/vp9/encoder/vp9_ratectrl.h
+++ b/vp9/encoder/vp9_ratectrl.h
@@ -204,6 +204,19 @@
   int preserve_arf_as_gld;
   int preserve_next_arf_as_gld;
   int show_arf_as_gld;
+
+  // Vizeir project experimental rate control parameters.
+  double active_wq_factor;
+  double base_err_per_mb;
+  double sr_default_decay_limit;
+  double sr_diff_part;
+  double kf_frame_max_boost_first;  // Max for first kf in a chunk.
+  double kf_frame_max_boost_subs;   // Max for subsequent mid chunk kfs.
+  double kf_max_total_boost;
+  double kf_err_per_mb;
+  double gf_frame_max_boost;
+  double gf_max_total_boost;
+  double zm_power_factor;
 } RATE_CONTROL;
 
 struct VP9_COMP;
--- a/vp9/encoder/vp9_rd.c
+++ b/vp9/encoder/vp9_rd.c
@@ -197,28 +197,99 @@
 static const int rd_frame_type_factor[FRAME_UPDATE_TYPES] = { 128, 144, 128,
                                                               128, 144, 144 };
 
+// Configure Vizier RD parameters.
+// Later this function will use passed in command line values.
+void vp9_init_rd_parameters(VP9_COMP *cpi) {
+  RD_CONTROL *const rdc = &cpi->rd_ctrl;
+  unsigned int screen_area = (cpi->common.width * cpi->common.height);
+
+  // Make sure this function is floating point safe.
+  vpx_clear_system_state();
+
+  if (1) {
+    // Non/pre-Vizer defaults
+    rdc->rd_mult_q_sq_inter_low_qp = 4.0;
+    rdc->rd_mult_q_sq_inter_mid_qp = 4.5;
+    rdc->rd_mult_q_sq_inter_high_qp = 3.0;
+    rdc->rd_mult_q_sq_key_ultralow_qp = 4.0;
+    rdc->rd_mult_q_sq_key_low_qp = 3.5;
+    rdc->rd_mult_q_sq_key_mid_qp = 4.5;
+    rdc->rd_mult_q_sq_key_high_qp = 7.5;
+  } else if (screen_area <= 176 * 144) {
+    rdc->rd_mult_q_sq_inter_high_qp = 4.295745965132044;
+    rdc->rd_mult_q_sq_inter_low_qp = 4.0718581295922025;
+    rdc->rd_mult_q_sq_inter_mid_qp = 4.031435609256739;
+    rdc->rd_mult_q_sq_key_low_qp = 5.7037775720838155;
+    rdc->rd_mult_q_sq_key_mid_qp = 4.72424015517201;
+    rdc->rd_mult_q_sq_key_ultralow_qp = 4.290774097327333;
+  } else if (screen_area <= 320 * 240) {
+    rdc->rd_mult_q_sq_inter_high_qp = 4.388244213131458;
+    rdc->rd_mult_q_sq_inter_low_qp = 4.506676356706102;
+    rdc->rd_mult_q_sq_inter_mid_qp = 4.489349899621181;
+    rdc->rd_mult_q_sq_key_low_qp = 4.497000582319771;
+    rdc->rd_mult_q_sq_key_mid_qp = 4.2825894884789735;
+    rdc->rd_mult_q_sq_key_ultralow_qp = 4.217074424696166;
+  } else if (screen_area <= 640 * 360) {
+    rdc->rd_mult_q_sq_inter_high_qp = 4.3702861603380025;
+    rdc->rd_mult_q_sq_inter_low_qp = 4.730644123689013;
+    rdc->rd_mult_q_sq_inter_mid_qp = 4.314589509578551;
+    rdc->rd_mult_q_sq_key_low_qp = 6.068652999601526;
+    rdc->rd_mult_q_sq_key_mid_qp = 4.817707474077241;
+    rdc->rd_mult_q_sq_key_ultralow_qp = 4.576902541873747;
+  } else if (screen_area <= 854 * 480) {
+    rdc->rd_mult_q_sq_inter_high_qp = 3.969083125219539;
+    rdc->rd_mult_q_sq_inter_low_qp = 4.811470143416073;
+    rdc->rd_mult_q_sq_inter_mid_qp = 4.621618127750201;
+    rdc->rd_mult_q_sq_key_low_qp = 5.073157238799473;
+    rdc->rd_mult_q_sq_key_mid_qp = 5.7587672849242635;
+    rdc->rd_mult_q_sq_key_ultralow_qp = 4.9854544277222566;
+  } else if (screen_area <= 1280 * 720) {
+    rdc->rd_mult_q_sq_inter_high_qp = 4.410712348825541;
+    rdc->rd_mult_q_sq_inter_low_qp = 5.119381136011107;
+    rdc->rd_mult_q_sq_inter_mid_qp = 4.518613675766538;
+    rdc->rd_mult_q_sq_key_low_qp = 5.848703119971484;
+    rdc->rd_mult_q_sq_key_mid_qp = 5.368947246228739;
+    rdc->rd_mult_q_sq_key_ultralow_qp = 3.9468491666607326;
+  } else if (screen_area <= 1920 * 1080) {
+    rdc->rd_mult_q_sq_inter_high_qp = 3.2141187537667797;
+    rdc->rd_mult_q_sq_inter_low_qp = 6.00569815296199;
+    rdc->rd_mult_q_sq_inter_mid_qp = 3.932565684947023;
+    rdc->rd_mult_q_sq_key_low_qp = 10.582906599488298;
+    rdc->rd_mult_q_sq_key_mid_qp = 6.274162346360692;
+    rdc->rd_mult_q_sq_key_ultralow_qp = 4.399795006320089;
+  }
+}
+
 int vp9_compute_rd_mult_based_on_qindex(const VP9_COMP *cpi, int qindex) {
-  // largest dc_quant is 21387, therefore rdmult should always fit in int32_t
+  const RD_CONTROL *rdc = &cpi->rd_ctrl;
   const int q = vp9_dc_quant(qindex, 0, cpi->common.bit_depth);
-  uint32_t rdmult = q * q;
+  // largest dc_quant is 21387, therefore rdmult should fit in int32_t
+  int rdmult = q * q;
 
+  // Make sure this function is floating point safe.
+  vpx_clear_system_state();
+
   if (cpi->common.frame_type != KEY_FRAME) {
-    if (qindex < 128)
-      rdmult = rdmult * 4;
-    else if (qindex < 190)
-      rdmult = rdmult * 4 + rdmult / 2;
-    else
-      rdmult = rdmult * 3;
+    if (qindex < 128) {
+      rdmult = (int)((double)rdmult * rdc->rd_mult_q_sq_inter_low_qp);
+    } else if (qindex < 190) {
+      rdmult = (int)((double)rdmult * rdc->rd_mult_q_sq_inter_mid_qp);
+    } else {
+      rdmult = (int)((double)rdmult * rdc->rd_mult_q_sq_inter_high_qp);
+    }
   } else {
-    if (qindex < 64)
-      rdmult = rdmult * 4;
-    else if (qindex <= 128)
-      rdmult = rdmult * 3 + rdmult / 2;
-    else if (qindex < 190)
-      rdmult = rdmult * 4 + rdmult / 2;
-    else
-      rdmult = rdmult * 7 + rdmult / 2;
+    if (qindex < 64) {
+      rdmult = (int)((double)rdmult * rdc->rd_mult_q_sq_key_ultralow_qp);
+    } else if (qindex <= 128) {
+      rdmult = (int)((double)rdmult * rdc->rd_mult_q_sq_key_low_qp);
+    } else if (qindex < 190) {
+      rdmult = (int)((double)rdmult * rdc->rd_mult_q_sq_key_mid_qp);
+
+    } else {
+      rdmult = (int)((double)rdmult * rdc->rd_mult_q_sq_key_high_qp);
+    }
   }
+
 #if CONFIG_VP9_HIGHBITDEPTH
   switch (cpi->common.bit_depth) {
     case VPX_BITS_10: rdmult = ROUND_POWER_OF_TWO(rdmult, 4); break;
--- a/vp9/encoder/vp9_rd.h
+++ b/vp9/encoder/vp9_rd.h
@@ -101,6 +101,18 @@
   THR_INTRA,
 } THR_MODES_SUB8X8;
 
+typedef struct {
+  // RD control parameters
+  // Added for Vizier project.
+  double rd_mult_q_sq_inter_low_qp;
+  double rd_mult_q_sq_inter_mid_qp;
+  double rd_mult_q_sq_inter_high_qp;
+  double rd_mult_q_sq_key_ultralow_qp;
+  double rd_mult_q_sq_key_low_qp;
+  double rd_mult_q_sq_key_mid_qp;
+  double rd_mult_q_sq_key_high_qp;
+} RD_CONTROL;
+
 typedef struct RD_OPT {
   // Thresh_mult is used to set a threshold for the rd score. A higher value
   // means that we will accept the best mode so far more often. This number
@@ -143,6 +155,8 @@
 struct TileDataEnc;
 struct VP9_COMP;
 struct macroblock;
+
+void vp9_init_rd_parameters(struct VP9_COMP *cpi);
 
 int vp9_compute_rd_mult_based_on_qindex(const struct VP9_COMP *cpi, int qindex);