ref: a5c17a689fb9b7f04b62c084acebbad9b666efba
parent: 557fab3678e11e54508bd984dee5673ec57d8da7
author: Jerome Jiang <jianj@google.com>
date: Fri Aug 10 12:14:00 EDT 2018
SVC: extend api to specify temporal id for each spatial layers. BUG=b/112294545 Change-Id: I5be230c8969d69af3ad87068fdf3834ef1af11d9
--- a/examples/vp9_spatial_svc_encoder.c
+++ b/examples/vp9_spatial_svc_encoder.c
@@ -592,9 +592,9 @@
// bypass/flexible mode. The pattern corresponds to the pattern
// VP9E_TEMPORAL_LAYERING_MODE_0101 (temporal_layering_mode == 2) used in
// non-flexible mode.
-void set_frame_flags_bypass_mode(int tl, int num_spatial_layers,
- int is_key_frame,
- vpx_svc_ref_frame_config_t *ref_frame_config) {
+static void set_frame_flags_bypass_mode_ex0(
+ int tl, int num_spatial_layers, int is_key_frame,
+ vpx_svc_ref_frame_config_t *ref_frame_config) {
int sl;
for (sl = 0; sl < num_spatial_layers; ++sl)
ref_frame_config->update_buffer_slot[sl] = 0;
@@ -672,6 +672,71 @@
}
}
+// Example pattern for 2 spatial layers and 2 temporal layers used in the
+// bypass/flexible mode, except only 1 spatial layer when temporal_layer_id = 1.
+static void set_frame_flags_bypass_mode_ex1(
+ int tl, int num_spatial_layers, int is_key_frame,
+ vpx_svc_ref_frame_config_t *ref_frame_config) {
+ int sl;
+ for (sl = 0; sl < num_spatial_layers; ++sl)
+ ref_frame_config->update_buffer_slot[sl] = 0;
+
+ if (tl == 0) {
+ if (is_key_frame) {
+ ref_frame_config->lst_fb_idx[1] = 0;
+ ref_frame_config->gld_fb_idx[1] = 1;
+ } else {
+ ref_frame_config->lst_fb_idx[1] = 1;
+ ref_frame_config->gld_fb_idx[1] = 0;
+ }
+ ref_frame_config->alt_fb_idx[1] = 0;
+
+ ref_frame_config->lst_fb_idx[0] = 0;
+ ref_frame_config->gld_fb_idx[0] = 0;
+ ref_frame_config->alt_fb_idx[0] = 0;
+ }
+ if (tl == 1) {
+ ref_frame_config->lst_fb_idx[0] = 0;
+ ref_frame_config->gld_fb_idx[0] = 1;
+ ref_frame_config->alt_fb_idx[0] = 2;
+
+ ref_frame_config->lst_fb_idx[1] = 1;
+ ref_frame_config->gld_fb_idx[1] = 2;
+ ref_frame_config->alt_fb_idx[1] = 3;
+ }
+ // Set the reference and update flags.
+ if (tl == 0) {
+ // Base spatial and base temporal (sl = 0, tl = 0)
+ ref_frame_config->reference_last[0] = 1;
+ ref_frame_config->reference_golden[0] = 0;
+ ref_frame_config->reference_alt_ref[0] = 0;
+ ref_frame_config->update_buffer_slot[0] |=
+ 1 << ref_frame_config->lst_fb_idx[0];
+
+ if (is_key_frame) {
+ ref_frame_config->reference_last[1] = 1;
+ ref_frame_config->reference_golden[1] = 0;
+ ref_frame_config->reference_alt_ref[1] = 0;
+ ref_frame_config->update_buffer_slot[1] |=
+ 1 << ref_frame_config->gld_fb_idx[1];
+ } else {
+ // Non-zero spatiall layer.
+ ref_frame_config->reference_last[1] = 1;
+ ref_frame_config->reference_golden[1] = 1;
+ ref_frame_config->reference_alt_ref[1] = 1;
+ ref_frame_config->update_buffer_slot[1] |=
+ 1 << ref_frame_config->lst_fb_idx[1];
+ }
+ }
+ if (tl == 1) {
+ // Top spatial and top temporal (non-reference -- doesn't update any
+ // reference buffers)
+ ref_frame_config->reference_last[1] = 1;
+ ref_frame_config->reference_golden[1] = 0;
+ ref_frame_config->reference_alt_ref[1] = 0;
+ }
+}
+
int main(int argc, const char **argv) {
AppInput app_input;
VpxVideoWriter *writer = NULL;
@@ -704,6 +769,8 @@
memset(&svc_ctx, 0, sizeof(svc_ctx));
memset(&app_input, 0, sizeof(AppInput));
memset(&info, 0, sizeof(VpxVideoInfo));
+ memset(&layer_id, 0, sizeof(vpx_svc_layer_id_t));
+ memset(&rc, 0, sizeof(struct RateControlStats));
exec_name = argv[0];
parse_command_line(argc, argv, &app_input, &svc_ctx, &enc_cfg);
@@ -801,6 +868,12 @@
while (!end_of_stream) {
vpx_codec_iter_t iter = NULL;
const vpx_codec_cx_pkt_t *cx_pkt;
+ // Example patterns for bypass/flexible mode:
+ // example_pattern = 0: 2 temporal layers, and spatial_layers = 1,2,3. Exact
+ // to fixed SVC patterns. example_pattern = 1: 2 spatial and 2 temporal
+ // layers, with SL0 only has TL0, and SL1 has both TL0 and TL1. This example
+ // uses the extended API.
+ int example_pattern = 1;
if (frame_cnt >= app_input.frames_to_code || !vpx_img_read(&raw, infile)) {
// We need one extra vpx_svc_encode call at end of stream to flush
// encoder and get remaining data
@@ -809,26 +882,49 @@
// For BYPASS/FLEXIBLE mode, set the frame flags (reference and updates)
// and the buffer indices for each spatial layer of the current
- // (super)frame to be encoded. The temporal layer_id for the current frame
- // also needs to be set.
+ // (super)frame to be encoded. The spatial and temporal layer_id for the
+ // current frame also needs to be set.
// TODO(marpan): Should rename the "VP9E_TEMPORAL_LAYERING_MODE_BYPASS"
// mode to "VP9E_LAYERING_MODE_BYPASS".
if (svc_ctx.temporal_layering_mode == VP9E_TEMPORAL_LAYERING_MODE_BYPASS) {
layer_id.spatial_layer_id = 0;
// Example for 2 temporal layers.
- if (frame_cnt % 2 == 0)
+ if (frame_cnt % 2 == 0) {
layer_id.temporal_layer_id = 0;
- else
+ for (i = 0; i < VPX_SS_MAX_LAYERS; i++)
+ layer_id.temporal_layer_id_per_spatial[i] = 0;
+ } else {
layer_id.temporal_layer_id = 1;
- // Note that we only set the temporal layer_id, since we are calling
- // the encode for the whole superframe. The encoder will internally loop
- // over all the spatial layers for the current superframe.
+ for (i = 0; i < VPX_SS_MAX_LAYERS; i++)
+ layer_id.temporal_layer_id_per_spatial[i] = 1;
+ }
+ if (example_pattern == 1) {
+ // example_pattern 1 is hard-coded for 2 spatial and 2 temporal layers.
+ assert(svc_ctx.spatial_layers == 2);
+ assert(svc_ctx.temporal_layers == 2);
+ if (frame_cnt % 2 == 0) {
+ // Spatial layer 0 and 1 are encoded.
+ layer_id.temporal_layer_id_per_spatial[0] = 0;
+ layer_id.temporal_layer_id_per_spatial[1] = 0;
+ layer_id.spatial_layer_id = 0;
+ } else {
+ // Only spatial layer 1 is encoded here.
+ layer_id.temporal_layer_id_per_spatial[1] = 1;
+ layer_id.spatial_layer_id = 1;
+ }
+ }
vpx_codec_control(&codec, VP9E_SET_SVC_LAYER_ID, &layer_id);
// TODO(jianj): Fix the parameter passing for "is_key_frame" in
// set_frame_flags_bypass_model() for case of periodic key frames.
- set_frame_flags_bypass_mode(layer_id.temporal_layer_id,
- svc_ctx.spatial_layers, frame_cnt == 0,
- &ref_frame_config);
+ if (example_pattern == 0) {
+ set_frame_flags_bypass_mode_ex0(layer_id.temporal_layer_id,
+ svc_ctx.spatial_layers, frame_cnt == 0,
+ &ref_frame_config);
+ } else if (example_pattern == 1) {
+ set_frame_flags_bypass_mode_ex1(layer_id.temporal_layer_id,
+ svc_ctx.spatial_layers, frame_cnt == 0,
+ &ref_frame_config);
+ }
vpx_codec_control(&codec, VP9E_SET_SVC_REF_FRAME_CONFIG,
&ref_frame_config);
// Keep track of input frames, to account for frame drops in rate control
--- a/examples/vpx_temporal_svc_encoder.c
+++ b/examples/vpx_temporal_svc_encoder.c
@@ -592,7 +592,7 @@
#if ROI_MAP
vpx_roi_map_t roi;
#endif
- vpx_svc_layer_id_t layer_id = { 0, 0 };
+ vpx_svc_layer_id_t layer_id;
const VpxInterface *encoder = NULL;
FILE *infile = NULL;
struct RateControlMetrics rc;
@@ -610,7 +610,7 @@
double framerate = 30.0;
zero(rc.layer_target_bitrate);
-
+ memset(&layer_id, 0, sizeof(vpx_svc_layer_id_t));
exec_name = argv[0];
// Check usage and arguments.
if (argc < min_args) {
@@ -856,6 +856,7 @@
layer_id.spatial_layer_id = 0;
layer_id.temporal_layer_id =
cfg.ts_layer_id[frame_cnt % cfg.ts_periodicity];
+ layer_id.temporal_layer_id_per_spatial[0] = layer_id.temporal_layer_id;
if (strncmp(encoder->name, "vp9", 3) == 0) {
vpx_codec_control(&codec, VP9E_SET_SVC_LAYER_ID, &layer_id);
} else if (strncmp(encoder->name, "vp8", 3) == 0) {
--- a/test/svc_datarate_test.cc
+++ b/test/svc_datarate_test.cc
@@ -186,6 +186,8 @@
layer_id.spatial_layer_id = 0;
layer_id.temporal_layer_id = (video->frame() % 2 != 0);
temporal_layer_id_ = layer_id.temporal_layer_id;
+ for (int i = 0; i < number_spatial_layers_; i++)
+ layer_id.temporal_layer_id_per_spatial[i] = temporal_layer_id_;
encoder->Control(VP9E_SET_SVC_LAYER_ID, &layer_id);
set_frame_flags_bypass_mode(layer_id.temporal_layer_id,
number_spatial_layers_, 0, &ref_frame_config);
--- a/test/vp9_datarate_test.cc
+++ b/test/vp9_datarate_test.cc
@@ -144,6 +144,8 @@
frame_flags_ = GetFrameFlags(video->frame(), cfg_.ts_number_layers);
layer_id.temporal_layer_id =
SetLayerId(video->frame(), cfg_.ts_number_layers);
+ layer_id.temporal_layer_id_per_spatial[0] =
+ SetLayerId(video->frame(), cfg_.ts_number_layers);
encoder->Control(VP9E_SET_SVC_LAYER_ID, &layer_id);
}
const vpx_rational_t tb = video->timebase();
--- a/vp9/encoder/vp9_svc_layercontext.c
+++ b/vp9/encoder/vp9_svc_layercontext.c
@@ -674,6 +674,7 @@
VP9_COMP *const cpi) {
SVC *const svc = &cpi->svc;
int sl = svc->spatial_layer_id = svc->spatial_layer_to_encode;
+ cpi->svc.temporal_layer_id = cpi->svc.temporal_layer_id_per_spatial[sl];
cpi->ext_refresh_frame_flags_pending = 1;
cpi->lst_fb_idx = svc->lst_fb_idx[sl];
cpi->gld_fb_idx = svc->gld_fb_idx[sl];
--- a/vp9/encoder/vp9_svc_layercontext.h
+++ b/vp9/encoder/vp9_svc_layercontext.h
@@ -173,6 +173,8 @@
uint8_t fb_idx_base[REF_FRAMES];
int use_set_ref_frame_config;
+
+ int temporal_layer_id_per_spatial[VPX_SS_MAX_LAYERS];
} SVC;
struct VP9_COMP;
--- a/vp9/vp9_cx_iface.c
+++ b/vp9/vp9_cx_iface.c
@@ -1446,9 +1446,16 @@
vpx_svc_layer_id_t *const data = va_arg(args, vpx_svc_layer_id_t *);
VP9_COMP *const cpi = (VP9_COMP *)ctx->cpi;
SVC *const svc = &cpi->svc;
+ int sl;
svc->spatial_layer_to_encode = data->spatial_layer_id;
+ // TODO(jianj): Deprecated to be removed.
svc->temporal_layer_id = data->temporal_layer_id;
+ // Allow for setting temporal layer per spatial layer for superframe.
+ for (sl = 0; sl < cpi->svc.number_spatial_layers; ++sl) {
+ svc->temporal_layer_id_per_spatial[sl] =
+ data->temporal_layer_id_per_spatial[sl];
+ }
// Checks on valid layer_id input.
if (svc->temporal_layer_id < 0 ||
svc->temporal_layer_id >= (int)ctx->cfg.ts_number_layers) {
--- a/vpx/vp8cx.h
+++ b/vpx/vp8cx.h
@@ -784,8 +784,10 @@
*
*/
typedef struct vpx_svc_layer_id {
- int spatial_layer_id; /**< Spatial layer id number. */
+ int spatial_layer_id; /**< First spatial layer to start encoding. */
+ // TODO(jianj): Deprecated, to be removed.
int temporal_layer_id; /**< Temporal layer id number. */
+ int temporal_layer_id_per_spatial[VPX_SS_MAX_LAYERS]; /**< Temp layer id. */
} vpx_svc_layer_id_t;
/*!\brief vp9 svc frame flag parameters.