ref: f84650be2de062eedf35afab3ffab4e42609e796
parent: b247f6e8e442979ea885b5a64bebf44d5a0e4323
parent: 0de415cf6a945457115783807a702a5249f44a9d
author: Jerome Jiang <jianj@google.com>
date: Thu Sep 23 18:10:57 EDT 2021
Merge "vp8 rc: support temporal layers" into main
--- a/test/vp8_ratectrl_rtc_test.cc
+++ b/test/vp8_ratectrl_rtc_test.cc
@@ -61,20 +61,81 @@
SetMode(::libvpx_test::kRealTime);
}
+ // From error_resilience_test.cc
+ int SetFrameFlags(int frame_num, int num_temp_layers) {
+ int frame_flags = 0;
+ if (num_temp_layers == 2) {
+ if (frame_num % 2 == 0) {
+ // Layer 0: predict from L and ARF, update L.
+ frame_flags =
+ VP8_EFLAG_NO_REF_GF | VP8_EFLAG_NO_UPD_GF | VP8_EFLAG_NO_UPD_ARF;
+ } else {
+ // Layer 1: predict from L, G and ARF, and update G.
+ frame_flags = VP8_EFLAG_NO_UPD_ARF | VP8_EFLAG_NO_UPD_LAST |
+ VP8_EFLAG_NO_UPD_ENTROPY;
+ }
+ } else if (num_temp_layers == 3) {
+ if (frame_num % 4 == 0) {
+ // Layer 0: predict from L, update L.
+ frame_flags = VP8_EFLAG_NO_UPD_GF | VP8_EFLAG_NO_UPD_ARF |
+ VP8_EFLAG_NO_REF_GF | VP8_EFLAG_NO_REF_ARF;
+ } else if ((frame_num - 2) % 4 == 0) {
+ // Layer 1: predict from L, G, update G.
+ frame_flags =
+ VP8_EFLAG_NO_UPD_ARF | VP8_EFLAG_NO_UPD_LAST | VP8_EFLAG_NO_REF_ARF;
+ } else if ((frame_num - 1) % 2 == 0) {
+ // Layer 2: predict from L, G, ARF; update ARG.
+ frame_flags = VP8_EFLAG_NO_UPD_GF | VP8_EFLAG_NO_UPD_LAST;
+ }
+ }
+ return frame_flags;
+ }
+
+ int SetLayerId(int frame_num, int num_temp_layers) {
+ int layer_id = 0;
+ if (num_temp_layers == 2) {
+ if (frame_num % 2 == 0) {
+ layer_id = 0;
+ } else {
+ layer_id = 1;
+ }
+ } else if (num_temp_layers == 3) {
+ if (frame_num % 4 == 0) {
+ layer_id = 0;
+ } else if ((frame_num - 2) % 4 == 0) {
+ layer_id = 1;
+ } else if ((frame_num - 1) % 2 == 0) {
+ layer_id = 2;
+ }
+ }
+ return layer_id;
+ }
+
virtual void PreEncodeFrameHook(::libvpx_test::VideoSource *video,
::libvpx_test::Encoder *encoder) {
- if (video->frame() == 0) {
- encoder->Control(VP8E_SET_CPUUSED, -6);
- encoder->Control(VP8E_SET_RTC_EXTERNAL_RATECTRL, 1);
- encoder->Control(VP8E_SET_MAX_INTRA_BITRATE_PCT, 1000);
+ if (rc_cfg_.ts_number_layers > 1) {
+ const int layer_id = SetLayerId(video->frame(), cfg_.ts_number_layers);
+ const int frame_flags =
+ SetFrameFlags(video->frame(), cfg_.ts_number_layers);
+ frame_params_.temporal_layer_id = layer_id;
+ if (video->frame() > 0) {
+ encoder->Control(VP8E_SET_TEMPORAL_LAYER_ID, layer_id);
+ encoder->Control(VP8E_SET_FRAME_FLAGS, frame_flags);
+ }
+ } else {
+ if (video->frame() == 0) {
+ encoder->Control(VP8E_SET_CPUUSED, -6);
+ encoder->Control(VP8E_SET_RTC_EXTERNAL_RATECTRL, 1);
+ encoder->Control(VP8E_SET_MAX_INTRA_BITRATE_PCT, 1000);
+ }
+ if (frame_params_.frame_type == INTER_FRAME) {
+ // Disable golden frame update.
+ frame_flags_ |= VP8_EFLAG_NO_UPD_GF;
+ frame_flags_ |= VP8_EFLAG_NO_UPD_ARF;
+ }
}
frame_params_.frame_type =
video->frame() % key_interval_ == 0 ? KEY_FRAME : INTER_FRAME;
- if (frame_params_.frame_type == INTER_FRAME) {
- // Disable golden frame update.
- frame_flags_ |= VP8_EFLAG_NO_UPD_GF;
- frame_flags_ |= VP8_EFLAG_NO_UPD_ARF;
- }
encoder_exit_ = video->frame() == test_video_.frames;
}
@@ -125,6 +186,38 @@
ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
}
+ void RunTemporalLayers2TL() {
+ test_video_ = GET_PARAM(2);
+ target_bitrate_ = GET_PARAM(1);
+ if (test_video_.width == 1280 && target_bitrate_ == 200) return;
+ if (test_video_.width == 640 && target_bitrate_ == 1000) return;
+ SetConfigTemporalLayers(2);
+ rc_api_ = libvpx::VP8RateControlRTC::Create(rc_cfg_);
+ rc_api_->UpdateRateControl(rc_cfg_);
+
+ ::libvpx_test::I420VideoSource video(test_video_.name, test_video_.width,
+ test_video_.height, 30, 1, 0,
+ test_video_.frames);
+
+ ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
+ }
+
+ void RunTemporalLayers3TL() {
+ test_video_ = GET_PARAM(2);
+ target_bitrate_ = GET_PARAM(1);
+ if (test_video_.width == 1280 && target_bitrate_ == 200) return;
+ if (test_video_.width == 640 && target_bitrate_ == 1000) return;
+ SetConfigTemporalLayers(3);
+ rc_api_ = libvpx::VP8RateControlRTC::Create(rc_cfg_);
+ rc_api_->UpdateRateControl(rc_cfg_);
+
+ ::libvpx_test::I420VideoSource video(test_video_.name, test_video_.width,
+ test_video_.height, 30, 1, 0,
+ test_video_.frames);
+
+ ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
+ }
+
private:
void SetConfig() {
rc_cfg_.width = test_video_.width;
@@ -160,6 +253,72 @@
cfg_.kf_max_dist = key_interval_;
}
+ void SetConfigTemporalLayers(int temporal_layers) {
+ rc_cfg_.width = test_video_.width;
+ rc_cfg_.height = test_video_.height;
+ rc_cfg_.max_quantizer = 60;
+ rc_cfg_.min_quantizer = 2;
+ rc_cfg_.target_bandwidth = target_bitrate_;
+ rc_cfg_.buf_initial_sz = 600;
+ rc_cfg_.buf_optimal_sz = 600;
+ rc_cfg_.buf_sz = target_bitrate_;
+ rc_cfg_.undershoot_pct = 50;
+ rc_cfg_.overshoot_pct = 50;
+ rc_cfg_.max_intra_bitrate_pct = 1000;
+ rc_cfg_.framerate = 30.0;
+ if (temporal_layers == 2) {
+ rc_cfg_.layer_target_bitrate[0] = 60 * target_bitrate_ / 100;
+ rc_cfg_.layer_target_bitrate[1] = target_bitrate_;
+ rc_cfg_.ts_rate_decimator[0] = 2;
+ rc_cfg_.ts_rate_decimator[1] = 1;
+ } else if (temporal_layers == 3) {
+ rc_cfg_.layer_target_bitrate[0] = 40 * target_bitrate_ / 100;
+ rc_cfg_.layer_target_bitrate[1] = 60 * target_bitrate_ / 100;
+ rc_cfg_.layer_target_bitrate[2] = target_bitrate_;
+ rc_cfg_.ts_rate_decimator[0] = 4;
+ rc_cfg_.ts_rate_decimator[1] = 2;
+ rc_cfg_.ts_rate_decimator[2] = 1;
+ }
+
+ rc_cfg_.ts_number_layers = temporal_layers;
+
+ // Encoder settings for ground truth.
+ cfg_.g_w = test_video_.width;
+ cfg_.g_h = test_video_.height;
+ cfg_.rc_undershoot_pct = 50;
+ cfg_.rc_overshoot_pct = 50;
+ cfg_.rc_buf_initial_sz = 600;
+ cfg_.rc_buf_optimal_sz = 600;
+ cfg_.rc_buf_sz = target_bitrate_;
+ cfg_.rc_dropframe_thresh = 0;
+ cfg_.rc_min_quantizer = 2;
+ cfg_.rc_max_quantizer = 60;
+ cfg_.rc_end_usage = VPX_CBR;
+ cfg_.g_lag_in_frames = 0;
+ cfg_.g_error_resilient = 1;
+ cfg_.rc_target_bitrate = target_bitrate_;
+ cfg_.kf_min_dist = key_interval_;
+ cfg_.kf_max_dist = key_interval_;
+ // 2 Temporal layers, no spatial layers, CBR mode.
+ cfg_.ss_number_layers = 1;
+ cfg_.ts_number_layers = temporal_layers;
+ if (temporal_layers == 2) {
+ cfg_.ts_rate_decimator[0] = 2;
+ cfg_.ts_rate_decimator[1] = 1;
+ cfg_.ts_periodicity = 2;
+ cfg_.ts_target_bitrate[0] = 60 * cfg_.rc_target_bitrate / 100;
+ cfg_.ts_target_bitrate[1] = cfg_.rc_target_bitrate;
+ } else if (temporal_layers == 3) {
+ cfg_.ts_rate_decimator[0] = 4;
+ cfg_.ts_rate_decimator[1] = 2;
+ cfg_.ts_rate_decimator[2] = 1;
+ cfg_.ts_periodicity = 4;
+ cfg_.ts_target_bitrate[0] = 40 * cfg_.rc_target_bitrate / 100;
+ cfg_.ts_target_bitrate[1] = 60 * cfg_.rc_target_bitrate / 100;
+ cfg_.ts_target_bitrate[2] = cfg_.rc_target_bitrate;
+ }
+ }
+
std::unique_ptr<libvpx::VP8RateControlRTC> rc_api_;
libvpx::VP8RateControlRtcConfig rc_cfg_;
int key_interval_;
@@ -172,6 +331,10 @@
TEST_P(Vp8RcInterfaceTest, OneLayer) { RunOneLayer(); }
TEST_P(Vp8RcInterfaceTest, OneLayerPeriodicKey) { RunPeriodicKey(); }
+
+TEST_P(Vp8RcInterfaceTest, TemporalLayers2TL) { RunTemporalLayers2TL(); }
+
+TEST_P(Vp8RcInterfaceTest, TemporalLayers3TL) { RunTemporalLayers3TL(); }
VP8_INSTANTIATE_TEST_SUITE(Vp8RcInterfaceTest,
::testing::Values(200, 400, 1000),
--- a/vp8/encoder/onyx_if.c
+++ b/vp8/encoder/onyx_if.c
@@ -183,7 +183,7 @@
extern FILE *vpxlogc;
#endif
-static void save_layer_context(VP8_COMP *cpi) {
+void vp8_save_layer_context(VP8_COMP *cpi) {
LAYER_CONTEXT *lc = &cpi->layer_context[cpi->current_layer];
/* Save layer dependent coding state */
@@ -222,7 +222,7 @@
sizeof(cpi->mb.count_mb_ref_frame_usage));
}
-static void restore_layer_context(VP8_COMP *cpi, const int layer) {
+void vp8_restore_layer_context(VP8_COMP *cpi, const int layer) {
LAYER_CONTEXT *lc = &cpi->layer_context[layer];
/* Restore layer dependent coding state */
@@ -269,9 +269,9 @@
return (int)(llval * llnum / llden);
}
-static void init_temporal_layer_context(VP8_COMP *cpi, VP8_CONFIG *oxcf,
- const int layer,
- double prev_layer_framerate) {
+void vp8_init_temporal_layer_context(VP8_COMP *cpi, VP8_CONFIG *oxcf,
+ const int layer,
+ double prev_layer_framerate) {
LAYER_CONTEXT *lc = &cpi->layer_context[layer];
lc->framerate = cpi->output_framerate / cpi->oxcf.rate_decimator[layer];
@@ -336,12 +336,12 @@
// We need this to set the layer context for the new layers below.
if (prev_num_layers == 1) {
cpi->current_layer = 0;
- save_layer_context(cpi);
+ vp8_save_layer_context(cpi);
}
for (i = 0; i < curr_num_layers; ++i) {
LAYER_CONTEXT *lc = &cpi->layer_context[i];
if (i >= prev_num_layers) {
- init_temporal_layer_context(cpi, oxcf, i, prev_layer_framerate);
+ vp8_init_temporal_layer_context(cpi, oxcf, i, prev_layer_framerate);
}
// The initial buffer levels are set based on their starting levels.
// We could set the buffer levels based on the previous state (normalized
@@ -356,7 +356,7 @@
// state (to smooth-out quality dips/rate fluctuation at transition)?
// We need to treat the 1 layer case separately: oxcf.target_bitrate[i]
- // is not set for 1 layer, and the restore_layer_context/save_context()
+ // is not set for 1 layer, and the vp8_restore_layer_context/save_context()
// are not called in the encoding loop, so we need to call it here to
// pass the layer context state to |cpi|.
if (curr_num_layers == 1) {
@@ -364,7 +364,7 @@
lc->buffer_level =
cpi->oxcf.starting_buffer_level_in_ms * lc->target_bandwidth / 1000;
lc->bits_off_target = lc->buffer_level;
- restore_layer_context(cpi, 0);
+ vp8_restore_layer_context(cpi, 0);
}
prev_layer_framerate = cpi->output_framerate / cpi->oxcf.rate_decimator[i];
}
@@ -1274,7 +1274,7 @@
cpi->framerate = framerate;
cpi->output_framerate = framerate;
cpi->per_frame_bandwidth =
- (int)(cpi->oxcf.target_bandwidth / cpi->output_framerate);
+ (int)round(cpi->oxcf.target_bandwidth / cpi->output_framerate);
cpi->av_per_frame_bandwidth = cpi->per_frame_bandwidth;
cpi->min_frame_bandwidth = (int)(cpi->av_per_frame_bandwidth *
cpi->oxcf.two_pass_vbrmin_section / 100);
@@ -1365,7 +1365,7 @@
double prev_layer_framerate = 0;
for (i = 0; i < cpi->oxcf.number_of_layers; ++i) {
- init_temporal_layer_context(cpi, oxcf, i, prev_layer_framerate);
+ vp8_init_temporal_layer_context(cpi, oxcf, i, prev_layer_framerate);
prev_layer_framerate =
cpi->output_framerate / cpi->oxcf.rate_decimator[i];
}
@@ -1382,7 +1382,7 @@
#endif
}
-static void update_layer_contexts(VP8_COMP *cpi) {
+void vp8_update_layer_contexts(VP8_COMP *cpi) {
VP8_CONFIG *oxcf = &cpi->oxcf;
/* Update snapshots of the layer contexts to reflect new parameters */
@@ -1417,8 +1417,8 @@
/* Work out the average size of a frame within this layer */
if (i > 0) {
lc->avg_frame_size_for_layer =
- (int)((oxcf->target_bitrate[i] - oxcf->target_bitrate[i - 1]) *
- 1000 / (lc->framerate - prev_layer_framerate));
+ (int)round((oxcf->target_bitrate[i] - oxcf->target_bitrate[i - 1]) *
+ 1000 / (lc->framerate - prev_layer_framerate));
}
prev_layer_framerate = lc->framerate;
@@ -3261,7 +3261,7 @@
#endif // !CONFIG_REALTIME_ONLY
default:
cpi->per_frame_bandwidth =
- (int)(cpi->target_bandwidth / cpi->output_framerate);
+ (int)round(cpi->target_bandwidth / cpi->output_framerate);
break;
}
@@ -4554,8 +4554,8 @@
for (i = cpi->current_layer + 1; i < cpi->oxcf.number_of_layers; ++i) {
LAYER_CONTEXT *lc = &cpi->layer_context[i];
- int bits_off_for_this_layer = (int)(lc->target_bandwidth / lc->framerate -
- cpi->projected_frame_size);
+ int bits_off_for_this_layer = (int)round(
+ lc->target_bandwidth / lc->framerate - cpi->projected_frame_size);
lc->bits_off_target += bits_off_for_this_layer;
@@ -4992,7 +4992,7 @@
if (cpi->oxcf.number_of_layers > 1) {
int layer;
- update_layer_contexts(cpi);
+ vp8_update_layer_contexts(cpi);
/* Restore layer specific context & set frame rate */
if (cpi->temporal_layer_id >= 0) {
@@ -5002,7 +5002,7 @@
cpi->oxcf
.layer_id[cpi->temporal_pattern_counter % cpi->oxcf.periodicity];
}
- restore_layer_context(cpi, layer);
+ vp8_restore_layer_context(cpi, layer);
vp8_new_framerate(cpi, cpi->layer_context[layer].framerate);
}
@@ -5133,7 +5133,7 @@
}
/* Save layer specific state */
- if (cpi->oxcf.number_of_layers > 1) save_layer_context(cpi);
+ if (cpi->oxcf.number_of_layers > 1) vp8_save_layer_context(cpi);
vpx_usec_timer_mark(&cmptimer);
cpi->time_compress_data += vpx_usec_timer_elapsed(&cmptimer);
--- a/vp8/encoder/onyx_int.h
+++ b/vp8/encoder/onyx_int.h
@@ -712,6 +712,12 @@
void vp8_alloc_compressor_data(VP8_COMP *cpi);
int vp8_reverse_trans(int x);
+void vp8_init_temporal_layer_context(VP8_COMP *cpi, VP8_CONFIG *oxcf,
+ const int layer,
+ double prev_layer_framerate);
+void vp8_update_layer_contexts(VP8_COMP *cpi);
+void vp8_save_layer_context(VP8_COMP *cpi);
+void vp8_restore_layer_context(VP8_COMP *cpi, const int layer);
void vp8_new_framerate(VP8_COMP *cpi, double framerate);
void vp8_loopfilter_frame(VP8_COMP *cpi, VP8_COMMON *cm);
--- a/vp8/vp8_ratectrl_rtc.cc
+++ b/vp8/vp8_ratectrl_rtc.cc
@@ -8,9 +8,11 @@
* be found in the AUTHORS file in the root of the source tree.
*/
+#include <math.h>
#include <new>
#include "vp8/vp8_ratectrl_rtc.h"
#include "vp8/encoder/ratectrl.h"
+#include "vpx_ports/system_state.h"
namespace libvpx {
/* Quant MOD */
@@ -90,7 +92,7 @@
const VP8RateControlRtcConfig &rc_cfg) {
VP8_COMMON *cm = &cpi_->common;
VP8_CONFIG *oxcf = &cpi_->oxcf;
-
+ vpx_clear_system_state();
cm->Width = rc_cfg.width;
cm->Height = rc_cfg.height;
oxcf->Width = rc_cfg.width;
@@ -102,6 +104,7 @@
cpi_->output_framerate = rc_cfg.framerate;
oxcf->target_bandwidth =
static_cast<unsigned int>(1000 * rc_cfg.target_bandwidth);
+ cpi_->ref_framerate = cpi_->output_framerate;
oxcf->fixed_q = -1;
oxcf->error_resilient_mode = 1;
oxcf->starting_buffer_level_in_ms = rc_cfg.buf_initial_sz;
@@ -110,7 +113,7 @@
oxcf->starting_buffer_level = rc_cfg.buf_initial_sz;
oxcf->optimal_buffer_level = rc_cfg.buf_optimal_sz;
oxcf->maximum_buffer_size = rc_cfg.buf_sz;
- oxcf->number_of_layers = 1;
+ oxcf->number_of_layers = rc_cfg.ts_number_layers;
cpi_->buffered_mode = oxcf->optimal_buffer_level > 0;
oxcf->under_shoot_pct = rc_cfg.undershoot_pct;
oxcf->over_shoot_pct = rc_cfg.overshoot_pct;
@@ -121,6 +124,20 @@
static_cast<int>(cpi_->output_framerate);
}
+ if (oxcf->number_of_layers > 1) {
+ memcpy(oxcf->target_bitrate, rc_cfg.layer_target_bitrate,
+ sizeof(rc_cfg.layer_target_bitrate));
+ memcpy(oxcf->rate_decimator, rc_cfg.ts_rate_decimator,
+ sizeof(rc_cfg.ts_rate_decimator));
+ oxcf->periodicity = 2;
+
+ double prev_layer_framerate = 0;
+ for (unsigned int i = 0; i < oxcf->number_of_layers; ++i) {
+ vp8_init_temporal_layer_context(cpi_, oxcf, i, prev_layer_framerate);
+ prev_layer_framerate = cpi_->output_framerate / oxcf->rate_decimator[i];
+ }
+ }
+
cpi_->total_actual_bits = 0;
cpi_->total_target_vs_actual = 0;
@@ -155,6 +172,15 @@
void VP8RateControlRTC::ComputeQP(const VP8FrameParamsQpRTC &frame_params) {
VP8_COMMON *const cm = &cpi_->common;
+ vpx_clear_system_state();
+ if (cpi_->oxcf.number_of_layers > 1) {
+ cpi_->temporal_layer_id = frame_params.temporal_layer_id;
+ const int layer = frame_params.temporal_layer_id;
+ vp8_update_layer_contexts(cpi_);
+ /* Restore layer specific context & set frame rate */
+ vp8_restore_layer_context(cpi_, layer);
+ vp8_new_framerate(cpi_, cpi_->layer_context[layer].framerate);
+ }
cm->frame_type = frame_params.frame_type;
cm->refresh_golden_frame = (cm->frame_type == KEY_FRAME) ? 1 : 0;
cm->refresh_alt_ref_frame = (cm->frame_type == KEY_FRAME) ? 1 : 0;
@@ -231,9 +257,15 @@
void VP8RateControlRTC::PostEncodeUpdate(uint64_t encoded_frame_size) {
VP8_COMMON *const cm = &cpi_->common;
-
+ vpx_clear_system_state();
cpi_->total_byte_count += encoded_frame_size;
cpi_->projected_frame_size = static_cast<int>(encoded_frame_size << 3);
+ if (cpi_->oxcf.number_of_layers > 1) {
+ for (unsigned int i = cpi_->current_layer + 1;
+ i < cpi_->oxcf.number_of_layers; ++i) {
+ cpi_->layer_context[i].total_byte_count += encoded_frame_size;
+ }
+ }
vp8_update_rate_correction_factors(cpi_, 2);
@@ -283,7 +315,30 @@
cpi_->total_actual_bits += cpi_->projected_frame_size;
cpi_->buffer_level = cpi_->bits_off_target;
+ /* Propagate values to higher temporal layers */
+ if (cpi_->oxcf.number_of_layers > 1) {
+ for (unsigned int i = cpi_->current_layer + 1;
+ i < cpi_->oxcf.number_of_layers; ++i) {
+ LAYER_CONTEXT *lc = &cpi_->layer_context[i];
+ int bits_off_for_this_layer = (int)round(
+ lc->target_bandwidth / lc->framerate - cpi_->projected_frame_size);
+
+ lc->bits_off_target += bits_off_for_this_layer;
+
+ /* Clip buffer level to maximum buffer size for the layer */
+ if (lc->bits_off_target > lc->maximum_buffer_size) {
+ lc->bits_off_target = lc->maximum_buffer_size;
+ }
+
+ lc->total_actual_bits += cpi_->projected_frame_size;
+ lc->total_target_vs_actual += bits_off_for_this_layer;
+ lc->buffer_level = lc->bits_off_target;
+ }
+ }
+
cpi_->common.current_video_frame++;
cpi_->frames_since_key++;
+
+ if (cpi_->oxcf.number_of_layers > 1) vp8_save_layer_context(cpi_);
}
} // namespace libvpx
--- a/vp8/vp8_ratectrl_rtc.h
+++ b/vp8/vp8_ratectrl_rtc.h
@@ -29,6 +29,7 @@
struct VP8FrameParamsQpRTC {
FRAME_TYPE frame_type;
+ int temporal_layer_id;
};
class VP8RateControlRTC {