shithub: libvpx

Download patch

ref: b5e754a840511c9956c033561955745a184495cb
parent: cbc4ead58691c899262513025a83f03f5932e50e
author: Paul Wilkins <paulwilkins@google.com>
date: Wed Mar 10 09:39:43 EST 2021

Change SR_diff calculation and representation

This patch changes the way prediction decay is calculated.

We expect that frames that are further from an ALT-REF frame (or Golden
 Frame) will be less well predicted by that ALT-REF frame. As such it is
desirable that they should contribute less to the boost calculation used
to assign bits to the ALT_REF.

This code looks at the reduction in prediction quality between the last
frame and the second reference frame (usually two frames old). We make
the assumption that we can accumulate this to get a proxy for the likely
loss of prediction quality over multiple frames.

Previously the calculation looked at the absolute difference in the
coded errors. The issue here is that the meaning of a unit difference
is not the same for very complex frames as it is for easy frames.

In this patch we scale the decay value based on how the error difference
compares to the overall frame complexity as represented by the intra
coding error.

This was tuned experimentally to give  test results that
were approximately neutral for our various test sets. There was
 a slight drop in Overall PSNR but a consistent improvement in
SSIM. This balance may be improved with tuning further as it is
noteworthy that it was much better on the hd_res set.

Results (Overall PSNR, SSIM -ve better) for low_res, ugc360, midres2,
ugc480P and hd_res are as follows:

0.173	-0.688
0.118	-0.153
0.132	-0.239
0.261	-0.405
-0.305	-1.109

As part of this adjustment the contribution of motion amplitude was
removed.

This patch also changes the control mechanism that will be exposed
on the command line for use by the Vizier project. The control is now
a linear factor which defaults to 1.0, where values < 1.0 mean a lower
decay rate and values > 1.0 mean an increased decay rate.

This presents a more easily understandable interface for use in
optimizing the decay behavior for various formats, where it is clear
what a passed in value means relative to the default.

With the new decay mechanism the current values for various formats
are almost certainly wrong and we still need to define sensible upper
and lower bounds for use during future training.

Change-Id: Ib1074bbea97c725cdbf25772ee8ed66831461ce3

--- a/vp9/encoder/vp9_firstpass.c
+++ b/vp9/encoder/vp9_firstpass.c
@@ -58,7 +58,6 @@
 #define INTRA_PART 0.005
 #define DEFAULT_DECAY_LIMIT 0.75
 #define LOW_SR_DIFF_TRHESH 0.1
-#define SR_DIFF_MAX 128.0
 #define LOW_CODED_ERR_PER_MB 10.0
 #define NCOUNT_FRAME_II_THRESH 6.0
 #define BASELINE_ERR_PER_MB 12500.0
@@ -1833,17 +1832,21 @@
   twopass->arnr_strength_adjustment = 0;
 }
 
-static double get_sr_decay_rate(const FRAME_INFO *frame_info,
-                                const TWO_PASS *const twopass,
+/* This function considers how the quality of prediction may be deteriorating
+ * with distance. It comapres the coded error for the last frame and the
+ * second reference frame (usually two frames old) and also applies a factor
+ * based on the extent of INTRA coding.
+ *
+ * The decay factor is then used to reduce the contribution of frames further
+ * from the alt-ref or golden frame, to the bitframe boost calculation for that
+ * alt-ref or golden frame.
+ */
+static double get_sr_decay_rate(const TWO_PASS *const twopass,
                                 const FIRSTPASS_STATS *frame) {
   double sr_diff = (frame->sr_coded_error - frame->coded_error);
   double sr_decay = 1.0;
   double modified_pct_inter;
   double modified_pcnt_intra;
-  const double motion_amplitude_part =
-      frame->pcnt_motion *
-      ((frame->mvc_abs + frame->mvr_abs) /
-       (frame_info->frame_height + frame_info->frame_width));
 
   modified_pct_inter = frame->pcnt_inter;
   if ((frame->coded_error > LOW_CODED_ERR_PER_MB) &&
@@ -1855,9 +1858,9 @@
   modified_pcnt_intra = 100 * (1.0 - modified_pct_inter);
 
   if ((sr_diff > LOW_SR_DIFF_TRHESH)) {
-    sr_diff = VPXMIN(sr_diff, SR_DIFF_MAX);
-    sr_decay = 1.0 - (twopass->sr_diff_part * sr_diff) - motion_amplitude_part -
-               (INTRA_PART * modified_pcnt_intra);
+    double sr_diff_part =
+        twopass->sr_diff_factor * ((sr_diff * 0.25) / frame->intra_error);
+    sr_decay = 1.0 - sr_diff_part - (INTRA_PART * modified_pcnt_intra);
   }
   return VPXMAX(sr_decay, twopass->sr_default_decay_limit);
 }
@@ -1864,20 +1867,17 @@
 
 // This function gives an estimate of how badly we believe the prediction
 // quality is decaying from frame to frame.
-static double get_zero_motion_factor(const FRAME_INFO *frame_info,
-                                     const TWO_PASS *const twopass,
+static double get_zero_motion_factor(const TWO_PASS *const twopass,
                                      const FIRSTPASS_STATS *frame_stats) {
   const double zero_motion_pct =
       frame_stats->pcnt_inter - frame_stats->pcnt_motion;
-  double sr_decay = get_sr_decay_rate(frame_info, twopass, frame_stats);
+  double sr_decay = get_sr_decay_rate(twopass, frame_stats);
   return VPXMIN(sr_decay, zero_motion_pct);
 }
 
-static double get_prediction_decay_rate(const FRAME_INFO *frame_info,
-                                        const TWO_PASS *const twopass,
+static double get_prediction_decay_rate(const TWO_PASS *const twopass,
                                         const FIRSTPASS_STATS *frame_stats) {
-  const double sr_decay_rate =
-      get_sr_decay_rate(frame_info, twopass, frame_stats);
+  const double sr_decay_rate = get_sr_decay_rate(twopass, frame_stats);
   const double zero_motion_factor =
       (0.95 * pow((frame_stats->pcnt_inter - frame_stats->pcnt_motion),
                   twopass->zm_power_factor));
@@ -2066,8 +2066,7 @@
 
     // Accumulate the effect of prediction quality decay.
     if (!flash_detected) {
-      decay_accumulator *=
-          get_prediction_decay_rate(frame_info, twopass, this_frame);
+      decay_accumulator *= get_prediction_decay_rate(twopass, this_frame);
       decay_accumulator = decay_accumulator < MIN_DECAY_FACTOR
                               ? MIN_DECAY_FACTOR
                               : decay_accumulator;
@@ -2107,8 +2106,7 @@
 
     // Cumulative effect of prediction quality decay.
     if (!flash_detected) {
-      decay_accumulator *=
-          get_prediction_decay_rate(frame_info, twopass, this_frame);
+      decay_accumulator *= get_prediction_decay_rate(twopass, this_frame);
       decay_accumulator = decay_accumulator < MIN_DECAY_FACTOR
                               ? MIN_DECAY_FACTOR
                               : decay_accumulator;
@@ -2606,16 +2604,14 @@
 
     // Monitor for static sections.
     if ((rc->frames_since_key + gop_coding_frames - 1) > 1) {
-      zero_motion_accumulator =
-          VPXMIN(zero_motion_accumulator,
-                 get_zero_motion_factor(frame_info, twopass, next_frame));
+      zero_motion_accumulator = VPXMIN(
+          zero_motion_accumulator, get_zero_motion_factor(twopass, next_frame));
     }
 
     // Accumulate the effect of prediction quality decay.
     if (!flash_detected) {
       double last_loop_decay_rate = loop_decay_rate;
-      loop_decay_rate =
-          get_prediction_decay_rate(frame_info, twopass, next_frame);
+      loop_decay_rate = get_prediction_decay_rate(twopass, next_frame);
 
       // Break clause to detect very still sections after motion. For example,
       // a static image after a fade or other transition.
@@ -3181,7 +3177,6 @@
 #define KF_ABS_ZOOM_THRESH 6.0
 
 int vp9_get_frames_to_next_key(const VP9EncoderConfig *oxcf,
-                               const FRAME_INFO *frame_info,
                                const TWO_PASS *const twopass, int kf_show_idx,
                                int min_gf_interval) {
   const FIRST_PASS_INFO *first_pass_info = &twopass->first_pass_info;
@@ -3211,8 +3206,7 @@
           break;
 
         // How fast is the prediction quality decaying?
-        loop_decay_rate =
-            get_prediction_decay_rate(frame_info, twopass, next_frame);
+        loop_decay_rate = get_prediction_decay_rate(twopass, next_frame);
 
         // We want to know something about the recent past... rather than
         // as used elsewhere where we are concerned with decay in prediction
@@ -3298,8 +3292,8 @@
   kf_mod_err = calc_norm_frame_score(oxcf, frame_info, keyframe_stats,
                                      mean_mod_score, av_err);
 
-  rc->frames_to_key = vp9_get_frames_to_next_key(
-      oxcf, frame_info, twopass, kf_show_idx, rc->min_gf_interval);
+  rc->frames_to_key = vp9_get_frames_to_next_key(oxcf, twopass, kf_show_idx,
+                                                 rc->min_gf_interval);
 
   // If there is a max kf interval set by the user we must obey it.
   // We already breakout of the loop above at 2x max.
@@ -3379,9 +3373,9 @@
       // Monitor for static sections.
       // First frame in kf group the second ref indicator is invalid.
       if (i > 0) {
-        zero_motion_accumulator = VPXMIN(
-            zero_motion_accumulator,
-            get_zero_motion_factor(&cpi->frame_info, twopass, &next_frame));
+        zero_motion_accumulator =
+            VPXMIN(zero_motion_accumulator,
+                   get_zero_motion_factor(twopass, &next_frame));
       } else {
         zero_motion_accumulator =
             next_frame.pcnt_inter - next_frame.pcnt_motion;
@@ -3493,7 +3487,7 @@
     twopass->active_wq_factor = AV_WQ_FACTOR;
     twopass->base_err_per_mb = BASELINE_ERR_PER_MB;
     twopass->sr_default_decay_limit = DEFAULT_DECAY_LIMIT;
-    twopass->sr_diff_part = SR_DIFF_PART;
+    twopass->sr_diff_factor = 1.0;
     twopass->gf_frame_max_boost = GF_MAX_FRAME_BOOST;
     twopass->gf_max_total_boost = MAX_GF_BOOST;
     if (screen_area < 1280 * 720) {
@@ -3515,7 +3509,7 @@
       twopass->active_wq_factor = 46.0;
       twopass->base_err_per_mb = 37597.399760969536;
       twopass->sr_default_decay_limit = 0.3905639800962774;
-      twopass->sr_diff_part = 0.009599023654146284;
+      twopass->sr_diff_factor = 6.4;
       twopass->gf_frame_max_boost = 87.27362648627846;
       twopass->gf_max_total_boost = MAX_GF_BOOST;
       twopass->kf_err_per_mb = 1854.8255436877148;
@@ -3528,7 +3522,7 @@
       twopass->active_wq_factor = 55.0;
       twopass->base_err_per_mb = 34525.33177195309;
       twopass->sr_default_decay_limit = 0.23901360046804604;
-      twopass->sr_diff_part = 0.008581014394766773;
+      twopass->sr_diff_factor = 5.73;
       twopass->gf_frame_max_boost = 127.34978204980285;
       twopass->gf_max_total_boost = MAX_GF_BOOST;
       twopass->kf_err_per_mb = 723.8337508755031;
@@ -3541,7 +3535,7 @@
       twopass->active_wq_factor = 12.5;
       twopass->base_err_per_mb = 18823.978018028298;
       twopass->sr_default_decay_limit = 0.6043527690301296;
-      twopass->sr_diff_part = 0.00343296783885544;
+      twopass->sr_diff_factor = 2.28;
       twopass->gf_frame_max_boost = 75.17672317013668;
       twopass->gf_max_total_boost = MAX_GF_BOOST;
       twopass->kf_err_per_mb = 422.2871502380377;
@@ -3554,7 +3548,7 @@
       twopass->active_wq_factor = 51.5;
       twopass->base_err_per_mb = 33718.98307662595;
       twopass->sr_default_decay_limit = 0.33633414970713393;
-      twopass->sr_diff_part = 0.00868988716928333;
+      twopass->sr_diff_factor = 5.8;
       twopass->gf_frame_max_boost = 85.2868528581522;
       twopass->gf_max_total_boost = MAX_GF_BOOST;
       twopass->kf_err_per_mb = 1513.4883914008383;
@@ -3567,7 +3561,7 @@
       twopass->active_wq_factor = 41.5;
       twopass->base_err_per_mb = 29527.46375825401;
       twopass->sr_default_decay_limit = 0.5009117586299728;
-      twopass->sr_diff_part = 0.005007364627260114;
+      twopass->sr_diff_factor = 3.33;
       twopass->gf_frame_max_boost = 81.00472969483079;
       twopass->gf_max_total_boost = MAX_GF_BOOST;
       twopass->kf_err_per_mb = 998.6342911785146;
@@ -3580,7 +3574,7 @@
       twopass->active_wq_factor = 31.0;
       twopass->base_err_per_mb = 34474.723463367416;
       twopass->sr_default_decay_limit = 0.23346886902707745;
-      twopass->sr_diff_part = 0.011431716637966029;
+      twopass->sr_diff_factor = 7.6;
       twopass->gf_frame_max_boost = 213.2940230360479;
       twopass->gf_max_total_boost = MAX_GF_BOOST;
       twopass->kf_err_per_mb = 35931.25734431429;
@@ -3873,9 +3867,8 @@
 
   *first_is_key_frame = 0;
   if (rc.frames_to_key == 0) {
-    rc.frames_to_key =
-        vp9_get_frames_to_next_key(&cpi->oxcf, &cpi->frame_info, twopass,
-                                   *first_show_idx, rc.min_gf_interval);
+    rc.frames_to_key = vp9_get_frames_to_next_key(
+        &cpi->oxcf, twopass, *first_show_idx, rc.min_gf_interval);
     rc.frames_since_key = 0;
     *first_is_key_frame = 1;
   }
@@ -3939,8 +3932,8 @@
     int use_alt_ref;
     int first_is_key_frame = 0;
     if (rc.frames_to_key == 0) {
-      rc.frames_to_key = vp9_get_frames_to_next_key(
-          oxcf, frame_info, twopass, show_idx, rc.min_gf_interval);
+      rc.frames_to_key = vp9_get_frames_to_next_key(oxcf, twopass, show_idx,
+                                                    rc.min_gf_interval);
       rc.frames_since_key = 0;
       first_is_key_frame = 1;
     }
@@ -3961,7 +3954,6 @@
 }
 
 void vp9_get_key_frame_map(const VP9EncoderConfig *oxcf,
-                           const FRAME_INFO *frame_info,
                            const TWO_PASS *const twopass, int *key_frame_map) {
   int show_idx = 0;
   RATE_CONTROL rc;
@@ -3975,8 +3967,8 @@
   while (show_idx < first_pass_info->num_frames) {
     int key_frame_group_size;
     key_frame_map[show_idx] = 1;
-    key_frame_group_size = vp9_get_frames_to_next_key(
-        oxcf, frame_info, twopass, show_idx, rc.min_gf_interval);
+    key_frame_group_size =
+        vp9_get_frames_to_next_key(oxcf, twopass, show_idx, rc.min_gf_interval);
     assert(key_frame_group_size > 0);
     show_idx += key_frame_group_size;
   }
--- a/vp9/encoder/vp9_firstpass.h
+++ b/vp9/encoder/vp9_firstpass.h
@@ -226,7 +226,7 @@
   double active_wq_factor;
   double base_err_per_mb;
   double sr_default_decay_limit;
-  double sr_diff_part;
+  double sr_diff_factor;
   double kf_err_per_mb;
   double kf_frame_min_boost;
   double kf_frame_max_boost_first;  // Max for first kf in a chunk.
@@ -262,7 +262,6 @@
 
 struct VP9EncoderConfig;
 int vp9_get_frames_to_next_key(const struct VP9EncoderConfig *oxcf,
-                               const FRAME_INFO *frame_info,
                                const TWO_PASS *const twopass, int kf_show_idx,
                                int min_gf_interval);
 #if CONFIG_RATE_CTRL
@@ -311,7 +310,6 @@
  * number of show frames in the video.
  */
 void vp9_get_key_frame_map(const struct VP9EncoderConfig *oxcf,
-                           const FRAME_INFO *frame_info,
                            const FIRST_PASS_INFO *first_pass_info,
                            int *key_frame_map);
 #endif  // CONFIG_RATE_CTRL
--- a/vp9/encoder/vp9_rd.c
+++ b/vp9/encoder/vp9_rd.c
@@ -201,14 +201,60 @@
 // Later this function will use passed in command line values.
 void vp9_init_rd_parameters(VP9_COMP *cpi) {
   RD_CONTROL *const rdc = &cpi->rd_ctrl;
-  unsigned int screen_area = (cpi->common.width * cpi->common.height);
 
   // Make sure this function is floating point safe.
   vpx_clear_system_state();
 
   rdc->rd_mult_q_sq_key_high_qp = 7.5;  // No defined Vizer values yet
-  if (1) {
-    // Non/pre-Vizer defaults
+
+  if (0) {
+    unsigned int screen_area = (cpi->common.width * cpi->common.height);
+
+    if (screen_area <= 176 * 144) {
+      rdc->rd_mult_q_sq_inter_low_qp = 4.0718581295922025;
+      rdc->rd_mult_q_sq_inter_mid_qp = 4.031435609256739;
+      rdc->rd_mult_q_sq_inter_high_qp = 4.295745965132044;
+      rdc->rd_mult_q_sq_key_ultralow_qp = 4.290774097327333;
+      rdc->rd_mult_q_sq_key_low_qp = 5.7037775720838155;
+      rdc->rd_mult_q_sq_key_mid_qp = 4.72424015517201;
+    } else if (screen_area <= 320 * 240) {
+      rdc->rd_mult_q_sq_inter_low_qp = 4.506676356706102;
+      rdc->rd_mult_q_sq_inter_mid_qp = 4.489349899621181;
+      rdc->rd_mult_q_sq_inter_high_qp = 4.388244213131458;
+      rdc->rd_mult_q_sq_key_ultralow_qp = 4.217074424696166;
+      rdc->rd_mult_q_sq_key_low_qp = 4.497000582319771;
+      rdc->rd_mult_q_sq_key_mid_qp = 4.2825894884789735;
+    } else if (screen_area <= 640 * 360) {
+      rdc->rd_mult_q_sq_inter_low_qp = 4.730644123689013;
+      rdc->rd_mult_q_sq_inter_mid_qp = 4.314589509578551;
+      rdc->rd_mult_q_sq_inter_high_qp = 4.3702861603380025;
+      rdc->rd_mult_q_sq_key_ultralow_qp = 4.576902541873747;
+      rdc->rd_mult_q_sq_key_low_qp = 6.068652999601526;
+      rdc->rd_mult_q_sq_key_mid_qp = 4.817707474077241;
+    } else if (screen_area <= 854 * 480) {
+      rdc->rd_mult_q_sq_inter_low_qp = 4.811470143416073;
+      rdc->rd_mult_q_sq_inter_mid_qp = 4.621618127750201;
+      rdc->rd_mult_q_sq_inter_high_qp = 3.969083125219539;
+      rdc->rd_mult_q_sq_key_ultralow_qp = 4.9854544277222566;
+      rdc->rd_mult_q_sq_key_low_qp = 5.073157238799473;
+      rdc->rd_mult_q_sq_key_mid_qp = 5.7587672849242635;
+    } else if (screen_area <= 1280 * 720) {
+      rdc->rd_mult_q_sq_inter_low_qp = 5.119381136011107;
+      rdc->rd_mult_q_sq_inter_mid_qp = 4.518613675766538;
+      rdc->rd_mult_q_sq_inter_high_qp = 4.410712348825541;
+      rdc->rd_mult_q_sq_key_ultralow_qp = 3.9468491666607326;
+      rdc->rd_mult_q_sq_key_low_qp = 5.848703119971484;
+      rdc->rd_mult_q_sq_key_mid_qp = 5.368947246228739;
+    } else {
+      rdc->rd_mult_q_sq_inter_low_qp = 6.00569815296199;
+      rdc->rd_mult_q_sq_inter_mid_qp = 3.932565684947023;
+      rdc->rd_mult_q_sq_inter_high_qp = 3.2141187537667797;
+      rdc->rd_mult_q_sq_key_ultralow_qp = 4.399795006320089;
+      rdc->rd_mult_q_sq_key_low_qp = 10.582906599488298;
+      rdc->rd_mult_q_sq_key_mid_qp = 6.274162346360692;
+    }
+  } else {
+    // For now force defaults unless testing
     rdc->rd_mult_q_sq_inter_low_qp = 4.0;
     rdc->rd_mult_q_sq_inter_mid_qp = 4.5;
     rdc->rd_mult_q_sq_inter_high_qp = 3.0;
@@ -215,48 +261,6 @@
     rdc->rd_mult_q_sq_key_ultralow_qp = 4.0;
     rdc->rd_mult_q_sq_key_low_qp = 3.5;
     rdc->rd_mult_q_sq_key_mid_qp = 4.5;
-  } else if (screen_area <= 176 * 144) {
-    rdc->rd_mult_q_sq_inter_low_qp = 4.0718581295922025;
-    rdc->rd_mult_q_sq_inter_mid_qp = 4.031435609256739;
-    rdc->rd_mult_q_sq_inter_high_qp = 4.295745965132044;
-    rdc->rd_mult_q_sq_key_ultralow_qp = 4.290774097327333;
-    rdc->rd_mult_q_sq_key_low_qp = 5.7037775720838155;
-    rdc->rd_mult_q_sq_key_mid_qp = 4.72424015517201;
-  } else if (screen_area <= 320 * 240) {
-    rdc->rd_mult_q_sq_inter_low_qp = 4.506676356706102;
-    rdc->rd_mult_q_sq_inter_mid_qp = 4.489349899621181;
-    rdc->rd_mult_q_sq_inter_high_qp = 4.388244213131458;
-    rdc->rd_mult_q_sq_key_ultralow_qp = 4.217074424696166;
-    rdc->rd_mult_q_sq_key_low_qp = 4.497000582319771;
-    rdc->rd_mult_q_sq_key_mid_qp = 4.2825894884789735;
-  } else if (screen_area <= 640 * 360) {
-    rdc->rd_mult_q_sq_inter_low_qp = 4.730644123689013;
-    rdc->rd_mult_q_sq_inter_mid_qp = 4.314589509578551;
-    rdc->rd_mult_q_sq_inter_high_qp = 4.3702861603380025;
-    rdc->rd_mult_q_sq_key_ultralow_qp = 4.576902541873747;
-    rdc->rd_mult_q_sq_key_low_qp = 6.068652999601526;
-    rdc->rd_mult_q_sq_key_mid_qp = 4.817707474077241;
-  } else if (screen_area <= 854 * 480) {
-    rdc->rd_mult_q_sq_inter_low_qp = 4.811470143416073;
-    rdc->rd_mult_q_sq_inter_mid_qp = 4.621618127750201;
-    rdc->rd_mult_q_sq_inter_high_qp = 3.969083125219539;
-    rdc->rd_mult_q_sq_key_ultralow_qp = 4.9854544277222566;
-    rdc->rd_mult_q_sq_key_low_qp = 5.073157238799473;
-    rdc->rd_mult_q_sq_key_mid_qp = 5.7587672849242635;
-  } else if (screen_area <= 1280 * 720) {
-    rdc->rd_mult_q_sq_inter_low_qp = 5.119381136011107;
-    rdc->rd_mult_q_sq_inter_mid_qp = 4.518613675766538;
-    rdc->rd_mult_q_sq_inter_high_qp = 4.410712348825541;
-    rdc->rd_mult_q_sq_key_ultralow_qp = 3.9468491666607326;
-    rdc->rd_mult_q_sq_key_low_qp = 5.848703119971484;
-    rdc->rd_mult_q_sq_key_mid_qp = 5.368947246228739;
-  } else {
-    rdc->rd_mult_q_sq_inter_low_qp = 6.00569815296199;
-    rdc->rd_mult_q_sq_inter_mid_qp = 3.932565684947023;
-    rdc->rd_mult_q_sq_inter_high_qp = 3.2141187537667797;
-    rdc->rd_mult_q_sq_key_ultralow_qp = 4.399795006320089;
-    rdc->rd_mult_q_sq_key_low_qp = 10.582906599488298;
-    rdc->rd_mult_q_sq_key_mid_qp = 6.274162346360692;
   }
 }