ref: bd26b8aa712af1defca7c307ee6c5bd0ceb01fb0
parent: 368200a80758234b96bf61776bc79133d21f4317
parent: e0d406586a137f189dfbe51289e00c732da8ec62
author: Jingning Han <jingning@google.com>
date: Tue Dec 18 01:34:49 EST 2018
Merge "Relocate tpl buffer allocation"
--- a/vp9/encoder/vp9_encodeframe.c
+++ b/vp9/encoder/vp9_encodeframe.c
@@ -5734,7 +5734,7 @@
int64_t mc_dep_cost_base = 0;
int row, col;
- for (row = 0; row < cm->mi_rows; ++row) {
+ for (row = 0; row < cm->mi_rows && tpl_frame->is_valid; ++row) {
for (col = 0; col < cm->mi_cols; ++col) {
TplDepStats *this_stats = &tpl_stats[row * tpl_stride + col];
intra_cost_base += this_stats->intra_cost;
--- a/vp9/encoder/vp9_encoder.c
+++ b/vp9/encoder/vp9_encoder.c
@@ -2110,7 +2110,7 @@
VP9_COMP *vp9_create_compressor(VP9EncoderConfig *oxcf,
BufferPool *const pool) {
- unsigned int i, frame;
+ unsigned int i;
VP9_COMP *volatile const cpi = vpx_memalign(32, sizeof(VP9_COMP));
VP9_COMMON *volatile const cm = cpi != NULL ? &cpi->common : NULL;
@@ -2361,52 +2361,11 @@
vp9_set_speed_features_framesize_independent(cpi);
vp9_set_speed_features_framesize_dependent(cpi);
- if (cpi->sf.enable_tpl_model) {
- const int mi_cols = mi_cols_aligned_to_sb(cm->mi_cols);
- const int mi_rows = mi_cols_aligned_to_sb(cm->mi_rows);
#if CONFIG_NON_GREEDY_MV
- CHECK_MEM_ERROR(
- cm, cpi->feature_score_loc_arr,
- vpx_calloc(mi_rows * mi_cols, sizeof(*cpi->feature_score_loc_arr)));
- CHECK_MEM_ERROR(
- cm, cpi->feature_score_loc_sort,
- vpx_calloc(mi_rows * mi_cols, sizeof(*cpi->feature_score_loc_sort)));
- CHECK_MEM_ERROR(
- cm, cpi->feature_score_loc_heap,
- vpx_calloc(mi_rows * mi_cols, sizeof(*cpi->feature_score_loc_heap)));
-#endif
- // TODO(jingning): Reduce the actual memory use for tpl model build up.
- for (frame = 0; frame < MAX_ARF_GOP_SIZE; ++frame) {
-#if CONFIG_NON_GREEDY_MV
- int sqr_bsize;
- int rf_idx;
- for (rf_idx = 0; rf_idx < 3; ++rf_idx) {
- for (sqr_bsize = 0; sqr_bsize < SQUARE_BLOCK_SIZES; ++sqr_bsize) {
- CHECK_MEM_ERROR(
- cm, cpi->tpl_stats[frame].pyramid_mv_arr[rf_idx][sqr_bsize],
- vpx_calloc(mi_rows * mi_cols,
- sizeof(*cpi->tpl_stats[frame]
- .pyramid_mv_arr[rf_idx][sqr_bsize])));
- }
- }
-#endif
- CHECK_MEM_ERROR(cm, cpi->tpl_stats[frame].tpl_stats_ptr,
- vpx_calloc(mi_rows * mi_cols,
- sizeof(*cpi->tpl_stats[frame].tpl_stats_ptr)));
- cpi->tpl_stats[frame].is_valid = 0;
- cpi->tpl_stats[frame].width = mi_cols;
- cpi->tpl_stats[frame].height = mi_rows;
- cpi->tpl_stats[frame].stride = mi_cols;
- cpi->tpl_stats[frame].mi_rows = cm->mi_rows;
- cpi->tpl_stats[frame].mi_cols = cm->mi_cols;
- }
+ cpi->feature_score_loc_alloc = 0;
+#endif // CONFIG_NON_GREEDY_MV
+ for (i = 0; i < MAX_ARF_GOP_SIZE; ++i) cpi->tpl_stats[i].tpl_stats_ptr = NULL;
- for (frame = 0; frame < REF_FRAMES; ++frame) {
- cpi->enc_frame_buf[frame].mem_valid = 0;
- cpi->enc_frame_buf[frame].released = 1;
- }
- }
-
// Allocate memory to store variances for a frame.
CHECK_MEM_ERROR(cm, cpi->source_diff_var, vpx_calloc(cm->MBs, sizeof(diff)));
cpi->source_var_thresh = 0;
@@ -6443,6 +6402,71 @@
#endif // DUMP_TPL_STATS
#endif // CONFIG_NON_GREEDY_MV
+static void init_tpl_buffer(VP9_COMP *cpi) {
+ VP9_COMMON *cm = &cpi->common;
+ int frame;
+
+ const int mi_cols = mi_cols_aligned_to_sb(cm->mi_cols);
+ const int mi_rows = mi_cols_aligned_to_sb(cm->mi_rows);
+#if CONFIG_NON_GREEDY_MV
+ int sqr_bsize;
+ int rf_idx;
+
+ // TODO(angiebird): This probably needs further modifications to support
+ // frame scaling later on.
+ if (cpi->feature_score_loc_alloc == 0) {
+ CHECK_MEM_ERROR(
+ cm, cpi->feature_score_loc_arr,
+ vpx_calloc(mi_rows * mi_cols, sizeof(*cpi->feature_score_loc_arr)));
+ CHECK_MEM_ERROR(
+ cm, cpi->feature_score_loc_sort,
+ vpx_calloc(mi_rows * mi_cols, sizeof(*cpi->feature_score_loc_sort)));
+ CHECK_MEM_ERROR(
+ cm, cpi->feature_score_loc_heap,
+ vpx_calloc(mi_rows * mi_cols, sizeof(*cpi->feature_score_loc_heap)));
+
+ cpi->feature_score_loc_alloc = 1;
+ }
+#endif
+
+ // TODO(jingning): Reduce the actual memory use for tpl model build up.
+ for (frame = 0; frame < MAX_ARF_GOP_SIZE; ++frame) {
+ if (cpi->tpl_stats[frame].width >= mi_cols &&
+ cpi->tpl_stats[frame].height >= mi_rows &&
+ cpi->tpl_stats[frame].tpl_stats_ptr)
+ continue;
+
+#if CONFIG_NON_GREEDY_MV
+ vpx_free(cpi->tpl_stats[frame].pyramid_mv_arr);
+ for (rf_idx = 0; rf_idx < 3; ++rf_idx) {
+ for (sqr_bsize = 0; sqr_bsize < SQUARE_BLOCK_SIZES; ++sqr_bsize) {
+ CHECK_MEM_ERROR(
+ cm, cpi->tpl_stats[frame].pyramid_mv_arr[rf_idx][sqr_bsize],
+ vpx_calloc(
+ mi_rows * mi_cols,
+ sizeof(
+ *cpi->tpl_stats[frame].pyramid_mv_arr[rf_idx][sqr_bsize])));
+ }
+ }
+#endif
+ vpx_free(cpi->tpl_stats[frame].tpl_stats_ptr);
+ CHECK_MEM_ERROR(cm, cpi->tpl_stats[frame].tpl_stats_ptr,
+ vpx_calloc(mi_rows * mi_cols,
+ sizeof(*cpi->tpl_stats[frame].tpl_stats_ptr)));
+ cpi->tpl_stats[frame].is_valid = 0;
+ cpi->tpl_stats[frame].width = mi_cols;
+ cpi->tpl_stats[frame].height = mi_rows;
+ cpi->tpl_stats[frame].stride = mi_cols;
+ cpi->tpl_stats[frame].mi_rows = cm->mi_rows;
+ cpi->tpl_stats[frame].mi_cols = cm->mi_cols;
+ }
+
+ for (frame = 0; frame < REF_FRAMES; ++frame) {
+ cpi->enc_frame_buf[frame].mem_valid = 0;
+ cpi->enc_frame_buf[frame].released = 1;
+ }
+}
+
static void setup_tpl_stats(VP9_COMP *cpi) {
GF_PICTURE gf_picture[MAX_ARF_GOP_SIZE];
const GF_GROUP *gf_group = &cpi->twopass.gf_group;
@@ -6680,6 +6704,7 @@
if (gf_group_index == 1 &&
cpi->twopass.gf_group.update_type[gf_group_index] == ARF_UPDATE &&
cpi->sf.enable_tpl_model) {
+ init_tpl_buffer(cpi);
vp9_estimate_qp_gop(cpi);
setup_tpl_stats(cpi);
}
--- a/vp9/encoder/vp9_encoder.h
+++ b/vp9/encoder/vp9_encoder.h
@@ -569,6 +569,7 @@
YV12_BUFFER_CONFIG *tpl_recon_frames[REF_FRAMES];
EncFrameBuf enc_frame_buf[REF_FRAMES];
#if CONFIG_NON_GREEDY_MV
+ int feature_score_loc_alloc;
FEATURE_SCORE_LOC *feature_score_loc_arr;
FEATURE_SCORE_LOC **feature_score_loc_sort;
FEATURE_SCORE_LOC **feature_score_loc_heap;