ref: 28345f9730e476eebd23c41c1abd71fc45b74d9a
parent: 932f8fa04dc15f4adf16df37402556e8c4dc72e7
author: Jerome Jiang <jianj@google.com>
date: Thu Nov 29 10:53:41 EST 2018
vp9: force refresh of long term ref when denoiser reset. This will allocate extra frame buffer if long term temporal reference is used and denoiser is enabled on non-key frame. Add test. Change-Id: I0e8d1fdb9a2d697a8eed7fe6206bcb362e69f1c8
--- a/test/svc_datarate_test.cc
+++ b/test/svc_datarate_test.cc
@@ -61,6 +61,8 @@
force_intra_only_frame_ = 0;
superframe_has_intra_only_ = 0;
use_post_encode_drop_ = 0;
+ denoiser_off_on_ = false;
+ denoiser_enable_layers_ = false;
}
virtual void BeginPassHook(unsigned int /*pass*/) {}
@@ -181,6 +183,46 @@
}
}
+ if (denoiser_off_on_) {
+ encoder->Control(VP9E_SET_AQ_MODE, 3);
+ // Set inter_layer_pred to INTER_LAYER_PRED_OFF_NONKEY (K-SVC).
+ encoder->Control(VP9E_SET_SVC_INTER_LAYER_PRED, 2);
+ if (!denoiser_enable_layers_) {
+ if (video->frame() == 0)
+ encoder->Control(VP9E_SET_NOISE_SENSITIVITY, 0);
+ else if (video->frame() == 100)
+ encoder->Control(VP9E_SET_NOISE_SENSITIVITY, 1);
+ } else {
+ // Cumulative bitrates for top spatial layers, for
+ // 3 temporal layers.
+ if (video->frame() == 0) {
+ encoder->Control(VP9E_SET_NOISE_SENSITIVITY, 0);
+ // Change layer bitrates to set top spatial layer to 0.
+ // This is for 3 spatial 3 temporal layers.
+ // This will trigger skip encoding/dropping of top spatial layer.
+ cfg_.rc_target_bitrate -= cfg_.layer_target_bitrate[8];
+ for (int i = 0; i < 3; i++)
+ bitrate_sl3_[i] = cfg_.layer_target_bitrate[i + 6];
+ cfg_.layer_target_bitrate[6] = 0;
+ cfg_.layer_target_bitrate[7] = 0;
+ cfg_.layer_target_bitrate[8] = 0;
+ encoder->Config(&cfg_);
+ } else if (video->frame() == 100) {
+ // Change layer bitrates to non-zero on top spatial layer.
+ // This will trigger skip encoding of top spatial layer
+ // on key frame (period = 100).
+ for (int i = 0; i < 3; i++)
+ cfg_.layer_target_bitrate[i + 6] = bitrate_sl3_[i];
+ cfg_.rc_target_bitrate += cfg_.layer_target_bitrate[8];
+ encoder->Config(&cfg_);
+ } else if (video->frame() == 120) {
+ // Enable denoiser and top spatial layer after key frame (period is
+ // 100).
+ encoder->Control(VP9E_SET_NOISE_SENSITIVITY, 1);
+ }
+ }
+ }
+
if (update_pattern_ && video->frame() >= 100) {
vpx_svc_layer_id_t layer_id;
if (video->frame() == 100) {
@@ -488,6 +530,11 @@
int force_intra_only_frame_;
int superframe_has_intra_only_;
int use_post_encode_drop_;
+ int bitrate_sl3_[3];
+ // Denoiser switched on the fly.
+ bool denoiser_off_on_;
+ // Top layer enabled on the fly.
+ bool denoiser_enable_layers_;
};
// Params: speed setting.
@@ -637,6 +684,112 @@
ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
CheckLayerRateTargeting(number_spatial_layers_, number_temporal_layers_, 0.78,
1.15);
+#if CONFIG_VP9_DECODER
+ // The non-reference frames are expected to be mismatched frames as the
+ // encoder will avoid loopfilter on these frames.
+ EXPECT_EQ(num_nonref_frames_, GetMismatchFrames());
+#endif
+}
+
+// Check basic rate targeting for 1 pass CBR SVC with 3 spatial and 3 temporal
+// layers, for inter_layer_pred=OffKey (K-SVC) and on the fly switching
+// of denoiser from off to on (on at frame = 100). Key frame period is set to
+// 1000 so denoise is enabled on non-key.
+TEST_P(DatarateOnePassCbrSvcSingleBR,
+ OnePassCbrSvc3SL3TL_DenoiserOffOnFixedLayers) {
+ cfg_.rc_buf_initial_sz = 500;
+ cfg_.rc_buf_optimal_sz = 500;
+ cfg_.rc_buf_sz = 1000;
+ cfg_.rc_min_quantizer = 0;
+ cfg_.rc_max_quantizer = 63;
+ cfg_.rc_end_usage = VPX_CBR;
+ cfg_.g_lag_in_frames = 0;
+ cfg_.ss_number_layers = 3;
+ cfg_.ts_number_layers = 3;
+ cfg_.ts_rate_decimator[0] = 4;
+ cfg_.ts_rate_decimator[1] = 2;
+ cfg_.ts_rate_decimator[2] = 1;
+ cfg_.temporal_layering_mode = 3;
+ cfg_.g_error_resilient = 1;
+ cfg_.g_threads = 1;
+ svc_params_.scaling_factor_num[0] = 72;
+ svc_params_.scaling_factor_den[0] = 288;
+ svc_params_.scaling_factor_num[1] = 144;
+ svc_params_.scaling_factor_den[1] = 288;
+ svc_params_.scaling_factor_num[2] = 288;
+ svc_params_.scaling_factor_den[2] = 288;
+ cfg_.rc_dropframe_thresh = 30;
+ cfg_.kf_max_dist = 1000;
+ number_spatial_layers_ = cfg_.ss_number_layers;
+ number_temporal_layers_ = cfg_.ts_number_layers;
+ ::libvpx_test::I420VideoSource video("desktop_office1.1280_720-020.yuv", 1280,
+ 720, 30, 1, 0, 300);
+ top_sl_width_ = 1280;
+ top_sl_height_ = 720;
+ cfg_.rc_target_bitrate = 1000;
+ ResetModel();
+ denoiser_off_on_ = true;
+ denoiser_enable_layers_ = false;
+ AssignLayerBitrates();
+ ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
+ // Don't check rate targeting on two top spatial layer since they will be
+ // skipped for part of the sequence.
+ CheckLayerRateTargeting(number_spatial_layers_ - 2, number_temporal_layers_,
+ 0.78, 1.15);
+#if CONFIG_VP9_DECODER
+ // The non-reference frames are expected to be mismatched frames as the
+ // encoder will avoid loopfilter on these frames.
+ EXPECT_EQ(num_nonref_frames_, GetMismatchFrames());
+#endif
+}
+
+// Check basic rate targeting for 1 pass CBR SVC with 3 spatial and 3 temporal
+// layers, for inter_layer_pred=OffKey (K-SVC) and on the fly switching
+// of denoiser from off to on, for dynamic layers. Start at 2 spatial layers
+// and enable 3rd spatial layer at frame = 100. Use periodic key frame with
+// period 100 so enabling of spatial layer occurs at key frame. Enable denoiser
+// at frame > 100, after the key frame sync.
+TEST_P(DatarateOnePassCbrSvcSingleBR,
+ OnePassCbrSvc3SL3TL_DenoiserOffOnEnableLayers) {
+ cfg_.rc_buf_initial_sz = 500;
+ cfg_.rc_buf_optimal_sz = 500;
+ cfg_.rc_buf_sz = 1000;
+ cfg_.rc_min_quantizer = 0;
+ cfg_.rc_max_quantizer = 63;
+ cfg_.rc_end_usage = VPX_CBR;
+ cfg_.g_lag_in_frames = 0;
+ cfg_.ss_number_layers = 3;
+ cfg_.ts_number_layers = 3;
+ cfg_.ts_rate_decimator[0] = 4;
+ cfg_.ts_rate_decimator[1] = 2;
+ cfg_.ts_rate_decimator[2] = 1;
+ cfg_.temporal_layering_mode = 3;
+ cfg_.g_error_resilient = 1;
+ cfg_.g_threads = 1;
+ svc_params_.scaling_factor_num[0] = 72;
+ svc_params_.scaling_factor_den[0] = 288;
+ svc_params_.scaling_factor_num[1] = 144;
+ svc_params_.scaling_factor_den[1] = 288;
+ svc_params_.scaling_factor_num[2] = 288;
+ svc_params_.scaling_factor_den[2] = 288;
+ cfg_.rc_dropframe_thresh = 0;
+ cfg_.kf_max_dist = 100;
+ number_spatial_layers_ = cfg_.ss_number_layers;
+ number_temporal_layers_ = cfg_.ts_number_layers;
+ ::libvpx_test::I420VideoSource video("desktop_office1.1280_720-020.yuv", 1280,
+ 720, 30, 1, 0, 300);
+ top_sl_width_ = 1280;
+ top_sl_height_ = 720;
+ cfg_.rc_target_bitrate = 1000;
+ ResetModel();
+ denoiser_off_on_ = true;
+ denoiser_enable_layers_ = true;
+ AssignLayerBitrates();
+ ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
+ // Don't check rate targeting on two top spatial layer since they will be
+ // skipped for part of the sequence.
+ CheckLayerRateTargeting(number_spatial_layers_ - 2, number_temporal_layers_,
+ 0.78, 1.15);
#if CONFIG_VP9_DECODER
// The non-reference frames are expected to be mismatched frames as the
// encoder will avoid loopfilter on these frames.
--- a/test/test-data.mk
+++ b/test/test-data.mk
@@ -3,6 +3,7 @@
# Encoder test source
LIBVPX_TEST_DATA-$(CONFIG_ENCODERS) += hantro_collage_w352h288.yuv
LIBVPX_TEST_DATA-$(CONFIG_ENCODERS) += hantro_odd.yuv
+LIBVPX_TEST_DATA-$(CONFIG_ENCODERS) += desktop_office1.1280_720-020.yuv
LIBVPX_TEST_DATA-$(CONFIG_ENCODERS) += park_joy_90p_10_420.y4m
LIBVPX_TEST_DATA-$(CONFIG_ENCODERS) += park_joy_90p_10_422.y4m
--- a/test/test-data.sha1
+++ b/test/test-data.sha1
@@ -863,3 +863,4 @@
eb198c25f861c3fe2cbd310de11eb96843019345 *invalid-crbug-1558.ivf.res
c62b005a9fd32c36a1b3f67de6840330f9915e34 *invalid-crbug-1562.ivf
f0cd8389948ad16085714d96567612136f6a46c5 *invalid-crbug-1562.ivf.res
+bac455906360b45338a16dd626ac5f19bc36a307 *desktop_office1.1280_720-020.yuv
--- a/vp9/encoder/vp9_denoiser.c
+++ b/vp9/encoder/vp9_denoiser.c
@@ -692,6 +692,7 @@
denoiser->denoising_level = kDenLow;
denoiser->prev_denoising_level = kDenLow;
denoiser->reset = 0;
+ denoiser->current_denoiser_frame = 0;
return 0;
}
@@ -716,13 +717,29 @@
vpx_free_frame_buffer(&denoiser->last_source);
}
-void vp9_denoiser_set_noise_level(VP9_DENOISER *denoiser, int noise_level) {
+static void force_refresh_longterm_ref(VP9_COMP *const cpi) {
+ SVC *const svc = &cpi->svc;
+ // If long term reference is used, force refresh of that slot, so
+ // denoiser buffer for long term reference stays in sync.
+ if (svc->use_gf_temporal_ref_current_layer) {
+ int index = svc->spatial_layer_id;
+ if (svc->number_spatial_layers == 3) index = svc->spatial_layer_id - 1;
+ assert(index >= 0);
+ cpi->alt_fb_idx = svc->buffer_gf_temporal_ref[index].idx;
+ cpi->refresh_alt_ref_frame = 1;
+ }
+}
+
+void vp9_denoiser_set_noise_level(VP9_COMP *const cpi, int noise_level) {
+ VP9_DENOISER *const denoiser = &cpi->denoiser;
denoiser->denoising_level = noise_level;
if (denoiser->denoising_level > kDenLowLow &&
- denoiser->prev_denoising_level == kDenLowLow)
+ denoiser->prev_denoising_level == kDenLowLow) {
denoiser->reset = 1;
- else
+ force_refresh_longterm_ref(cpi);
+ } else {
denoiser->reset = 0;
+ }
denoiser->prev_denoising_level = denoiser->denoising_level;
}
@@ -754,14 +771,24 @@
return threshold;
}
+void vp9_denoiser_reset_on_first_frame(VP9_COMP *const cpi) {
+ if (vp9_denoise_svc_non_key(cpi) &&
+ cpi->denoiser.current_denoiser_frame == 0) {
+ cpi->denoiser.reset = 1;
+ force_refresh_longterm_ref(cpi);
+ }
+}
+
void vp9_denoiser_update_ref_frame(VP9_COMP *const cpi) {
VP9_COMMON *const cm = &cpi->common;
SVC *const svc = &cpi->svc;
+
if (cpi->oxcf.noise_sensitivity > 0 && denoise_svc(cpi) &&
cpi->denoiser.denoising_level > kDenLowLow) {
int svc_refresh_denoiser_buffers = 0;
int denoise_svc_second_layer = 0;
FRAME_TYPE frame_type = cm->intra_only ? KEY_FRAME : cm->frame_type;
+ cpi->denoiser.current_denoiser_frame++;
if (cpi->use_svc) {
const int svc_buf_shift =
svc->number_spatial_layers - svc->spatial_layer_id == 2
--- a/vp9/encoder/vp9_denoiser.h
+++ b/vp9/encoder/vp9_denoiser.h
@@ -50,6 +50,7 @@
int reset;
int num_ref_frames;
int num_layers;
+ unsigned int current_denoiser_frame;
VP9_DENOISER_LEVEL denoising_level;
VP9_DENOISER_LEVEL prev_denoising_level;
} VP9_DENOISER;
@@ -111,7 +112,9 @@
void vp9_denoiser_free(VP9_DENOISER *denoiser);
-void vp9_denoiser_set_noise_level(VP9_DENOISER *denoiser, int noise_level);
+void vp9_denoiser_set_noise_level(struct VP9_COMP *const cpi, int noise_level);
+
+void vp9_denoiser_reset_on_first_frame(struct VP9_COMP *const cpi);
int64_t vp9_scale_part_thresh(int64_t threshold, VP9_DENOISER_LEVEL noise_level,
int content_state, int temporal_layer_id);
--- a/vp9/encoder/vp9_encoder.c
+++ b/vp9/encoder/vp9_encoder.c
@@ -3815,6 +3815,10 @@
cm->mi_rows * cm->mi_cols * sizeof(*cpi->consec_zero_mv));
}
+#if CONFIG_VP9_TEMPORAL_DENOISING
+ if (cpi->oxcf.noise_sensitivity > 0 && cpi->use_svc)
+ vp9_denoiser_reset_on_first_frame(cpi);
+#endif
vp9_update_noise_estimate(cpi);
// Scene detection is always used for VBR mode or screen-content case.
--- a/vp9/encoder/vp9_noise_estimate.c
+++ b/vp9/encoder/vp9_noise_estimate.c
@@ -159,7 +159,7 @@
#if CONFIG_VP9_TEMPORAL_DENOISING
if (cpi->oxcf.noise_sensitivity > 0 && noise_est_svc(cpi) &&
cpi->svc.current_superframe > 1) {
- vp9_denoiser_set_noise_level(&cpi->denoiser, ne->level);
+ vp9_denoiser_set_noise_level(cpi, ne->level);
copy_frame(&cpi->denoiser.last_source, cpi->Source);
}
#endif
@@ -269,7 +269,7 @@
ne->level = vp9_noise_estimate_extract_level(ne);
#if CONFIG_VP9_TEMPORAL_DENOISING
if (cpi->oxcf.noise_sensitivity > 0 && noise_est_svc(cpi))
- vp9_denoiser_set_noise_level(&cpi->denoiser, ne->level);
+ vp9_denoiser_set_noise_level(cpi, ne->level);
#endif
}
}
--- a/vp9/encoder/vp9_pickmode.c
+++ b/vp9/encoder/vp9_pickmode.c
@@ -1815,13 +1815,7 @@
#if CONFIG_VP9_TEMPORAL_DENOISING
if (cpi->oxcf.noise_sensitivity > 0) {
- if (cpi->use_svc) {
- int layer =
- LAYER_IDS_TO_IDX(svc->spatial_layer_id, svc->temporal_layer_id,
- svc->number_temporal_layers);
- LAYER_CONTEXT *lc = &svc->layer_context[layer];
- denoise_svc_pickmode = denoise_svc(cpi) && !lc->is_key_frame;
- }
+ if (cpi->use_svc) denoise_svc_pickmode = vp9_denoise_svc_non_key(cpi);
if (cpi->denoiser.denoising_level > kDenLowLow && denoise_svc_pickmode)
vp9_denoiser_reset_frame_stats(ctx);
}
--- a/vp9/encoder/vp9_svc_layercontext.c
+++ b/vp9/encoder/vp9_svc_layercontext.c
@@ -1106,6 +1106,16 @@
}
}
+#if CONFIG_VP9_TEMPORAL_DENOISING
+int vp9_denoise_svc_non_key(VP9_COMP *const cpi) {
+ int layer =
+ LAYER_IDS_TO_IDX(cpi->svc.spatial_layer_id, cpi->svc.temporal_layer_id,
+ cpi->svc.number_temporal_layers);
+ LAYER_CONTEXT *lc = &cpi->svc.layer_context[layer];
+ return denoise_svc(cpi) && !lc->is_key_frame;
+}
+#endif
+
void vp9_svc_check_spatial_layer_sync(VP9_COMP *const cpi) {
SVC *const svc = &cpi->svc;
// Only for superframes whose base is not key, as those are
--- a/vp9/encoder/vp9_svc_layercontext.h
+++ b/vp9/encoder/vp9_svc_layercontext.h
@@ -235,6 +235,10 @@
// Start a frame and initialize svc parameters
int vp9_svc_start_frame(struct VP9_COMP *const cpi);
+#if CONFIG_VP9_TEMPORAL_DENOISING
+int vp9_denoise_svc_non_key(struct VP9_COMP *const cpi);
+#endif
+
void vp9_copy_flags_ref_update_idx(struct VP9_COMP *const cpi);
int vp9_one_pass_cbr_svc_start_layer(struct VP9_COMP *const cpi);