ref: bca4564683a35c795973ce2a318cc4f1391f9bbb
parent: 30ef50b522c7b568921ca3e37bcc71ca7cd52972
author: Yunqing Wang <yunqingwang@google.com>
date: Wed Apr 19 13:00:08 EDT 2017
Make allow_exhaustive_searches feature no longer adaptive A previous patch turned on allow_exhaustive_searches feature only for FC_GRAPHICS_ANIMATION content. This patch further modified the feature by removing the exhaustive search limit, and made it no longer adaptive. As a result, the 2 counts that recorded the number of motion searches were removed, which helped achieve the determinism in the row based multi-threading encoding. Tests showed that this patch didn't cause the encoder much slower. Used exhaustive_searches_thresh for this speed feature, and removed allow_exhaustive_searches. Also, refactored the speed feature code to follow the general speed feature setting style. Change-Id: Ib96b182c4c8dfff4c1ab91d2497cc42bb9e5a4aa
--- a/vp9/encoder/vp9_block.h
+++ b/vp9/encoder/vp9_block.h
@@ -93,11 +93,6 @@
int rddiv;
int rdmult;
int mb_energy;
- int *m_search_count_ptr;
- int *ex_search_count_ptr;
-#if CONFIG_MULTITHREAD
- pthread_mutex_t *search_count_mutex;
-#endif
// These are set to their default values at the beginning, and then adjusted
// further in the encoding process.
--- a/vp9/encoder/vp9_encodeframe.c
+++ b/vp9/encoder/vp9_encodeframe.c
@@ -4341,7 +4341,6 @@
}
}
#if CONFIG_MULTITHREAD
- tile_data->search_count_mutex = NULL;
tile_data->enc_row_mt_mutex = NULL;
tile_data->row_base_thresh_freq_fact = NULL;
#endif
@@ -4361,10 +4360,6 @@
cpi->tplist[tile_row][tile_col] = tplist + tplist_count;
tplist = cpi->tplist[tile_row][tile_col];
tplist_count = get_num_vert_units(*tile_info, MI_BLOCK_SIZE_LOG2);
-
- // Set up pointers to per thread motion search counters.
- this_tile->m_search_count = 0; // Count of motion search hits.
- this_tile->ex_search_count = 0; // Exhaustive mesh search hits.
}
}
}
@@ -4408,13 +4403,6 @@
const int mi_row_start = tile_info->mi_row_start;
const int mi_row_end = tile_info->mi_row_end;
int mi_row;
-
- // Set up pointers to per thread motion search counters.
- td->mb.m_search_count_ptr = &this_tile->m_search_count;
- td->mb.ex_search_count_ptr = &this_tile->ex_search_count;
-#if CONFIG_MULTITHREAD
- td->mb.search_count_mutex = this_tile->search_count_mutex;
-#endif
for (mi_row = mi_row_start; mi_row < mi_row_end; mi_row += MI_BLOCK_SIZE)
vp9_encode_sb_row(cpi, td, tile_row, tile_col, mi_row);
--- a/vp9/encoder/vp9_encoder.h
+++ b/vp9/encoder/vp9_encoder.h
@@ -281,8 +281,6 @@
TileInfo tile_info;
int thresh_freq_fact[BLOCK_SIZES][MAX_MODES];
int mode_map[BLOCK_SIZES][MAX_MODES];
- int m_search_count;
- int ex_search_count;
FIRSTPASS_DATA fp_data;
VP9RowMTSync row_mt_sync;
@@ -289,7 +287,6 @@
// Used for adaptive_rd_thresh with row multithreading
int *row_base_thresh_freq_fact;
#if CONFIG_MULTITHREAD
- pthread_mutex_t *search_count_mutex;
pthread_mutex_t *enc_row_mt_mutex;
#endif
} TileDataEnc;
--- a/vp9/encoder/vp9_ethread.c
+++ b/vp9/encoder/vp9_ethread.c
@@ -552,7 +552,6 @@
const VP9_COMMON *const cm = &cpi->common;
const int tile_cols = 1 << cm->log2_tile_cols;
int tile_row, tile_col;
- TileDataEnc *this_tile;
int end_of_frame;
int thread_id = thread_data->thread_id;
int cur_tile_id = multi_thread_ctxt->thread_id_to_tile_id[thread_id];
@@ -573,13 +572,6 @@
tile_col = proc_job->tile_col_id;
tile_row = proc_job->tile_row_id;
mi_row = proc_job->vert_unit_row_num * MI_BLOCK_SIZE;
-
- this_tile = &cpi->tile_data[tile_row * tile_cols + tile_col];
- thread_data->td->mb.m_search_count_ptr = &this_tile->m_search_count;
- thread_data->td->mb.ex_search_count_ptr = &this_tile->ex_search_count;
-#if CONFIG_MULTITHREAD
- thread_data->td->mb.search_count_mutex = this_tile->search_count_mutex;
-#endif
vp9_encode_sb_row(cpi, thread_data->td, tile_row, tile_col, mi_row);
}
--- a/vp9/encoder/vp9_mcomp.c
+++ b/vp9/encoder/vp9_mcomp.c
@@ -1998,18 +1998,6 @@
int range = sf->mesh_patterns[0].range;
int baseline_interval_divisor;
-#if CONFIG_MULTITHREAD
- if (NULL != x->search_count_mutex) pthread_mutex_lock(x->search_count_mutex);
-#endif
-
- // Keep track of number of exhaustive calls (this frame in this thread).
- ++(*x->ex_search_count_ptr);
-
-#if CONFIG_MULTITHREAD
- if (NULL != x->search_count_mutex)
- pthread_mutex_unlock(x->search_count_mutex);
-#endif
-
// Trap illegal values for interval and range for this function.
if ((range < MIN_RANGE) || (range > MAX_RANGE) || (interval < MIN_INTERVAL) ||
(interval > range))
@@ -2367,32 +2355,6 @@
return best_sad;
}
-#define MIN_EX_SEARCH_LIMIT 128
-static int is_exhaustive_allowed(VP9_COMP *cpi, MACROBLOCK *x) {
- const SPEED_FEATURES *const sf = &cpi->sf;
- int is_exhaustive_allowed;
- int max_ex;
-
-#if CONFIG_MULTITHREAD
- if (NULL != x->search_count_mutex) pthread_mutex_lock(x->search_count_mutex);
-#endif
-
- max_ex = VPXMAX(MIN_EX_SEARCH_LIMIT,
- (*x->m_search_count_ptr * sf->max_exaustive_pct) / 100);
-
- is_exhaustive_allowed = sf->allow_exhaustive_searches &&
- (sf->exhaustive_searches_thresh < INT_MAX) &&
- (*x->ex_search_count_ptr <= max_ex) &&
- !cpi->rc.is_src_frame_alt_ref;
-
-#if CONFIG_MULTITHREAD
- if (NULL != x->search_count_mutex)
- pthread_mutex_unlock(x->search_count_mutex);
-#endif
-
- return is_exhaustive_allowed;
-}
-
int vp9_full_pixel_search(VP9_COMP *cpi, MACROBLOCK *x, BLOCK_SIZE bsize,
MV *mvp_full, int step_param, int search_method,
int error_per_bit, int *cost_list, const MV *ref_mv,
@@ -2435,21 +2397,9 @@
MAX_MVSEARCH_STEPS - 1 - step_param, 1,
cost_list, fn_ptr, ref_mv, tmp_mv);
-#if CONFIG_MULTITHREAD
- if (NULL != x->search_count_mutex)
- pthread_mutex_lock(x->search_count_mutex);
-#endif
-
- // Keep track of number of searches (this frame in this thread).
- ++(*x->m_search_count_ptr);
-
-#if CONFIG_MULTITHREAD
- if (NULL != x->search_count_mutex)
- pthread_mutex_unlock(x->search_count_mutex);
-#endif
-
// Should we allow a follow on exhaustive search?
- if (is_exhaustive_allowed(cpi, x)) {
+ if ((sf->exhaustive_searches_thresh < INT_MAX) &&
+ !cpi->rc.is_src_frame_alt_ref) {
int64_t exhuastive_thr = sf->exhaustive_searches_thresh;
exhuastive_thr >>=
8 - (b_width_log2_lookup[bsize] + b_height_log2_lookup[bsize]);
--- a/vp9/encoder/vp9_multi_thread.c
+++ b/vp9/encoder/vp9_multi_thread.c
@@ -116,11 +116,6 @@
for (tile_col = 0; tile_col < tile_cols; tile_col++) {
TileDataEnc *this_tile = &cpi->tile_data[tile_row * tile_cols + tile_col];
- CHECK_MEM_ERROR(cm, this_tile->search_count_mutex,
- vpx_malloc(sizeof(*this_tile->search_count_mutex)));
-
- pthread_mutex_init(this_tile->search_count_mutex, NULL);
-
CHECK_MEM_ERROR(cm, this_tile->enc_row_mt_mutex,
vpx_malloc(sizeof(*this_tile->enc_row_mt_mutex)));
@@ -170,9 +165,6 @@
this_tile->row_base_thresh_freq_fact = NULL;
}
}
- pthread_mutex_destroy(this_tile->search_count_mutex);
- vpx_free(this_tile->search_count_mutex);
- this_tile->search_count_mutex = NULL;
pthread_mutex_destroy(this_tile->enc_row_mt_mutex);
vpx_free(this_tile->enc_row_mt_mutex);
this_tile->enc_row_mt_mutex = NULL;
--- a/vp9/encoder/vp9_speed_features.c
+++ b/vp9/encoder/vp9_speed_features.c
@@ -20,19 +20,14 @@
{ 64, 4 }, { 28, 2 }, { 15, 1 }, { 7, 1 }
};
-#define MAX_MESH_SPEED 5 // Max speed setting for mesh motion method
+// Define 3 mesh density levels to control the number of searches.
+#define MESH_DENSITY_LEVELS 3
static MESH_PATTERN
- good_quality_mesh_patterns[MAX_MESH_SPEED + 1][MAX_MESH_STEP] = {
+ good_quality_mesh_patterns[MESH_DENSITY_LEVELS][MAX_MESH_STEP] = {
{ { 64, 8 }, { 28, 4 }, { 15, 1 }, { 7, 1 } },
- { { 64, 8 }, { 28, 4 }, { 15, 1 }, { 7, 1 } },
{ { 64, 8 }, { 14, 2 }, { 7, 1 }, { 7, 1 } },
{ { 64, 16 }, { 24, 8 }, { 12, 4 }, { 7, 1 } },
- { { 64, 16 }, { 24, 8 }, { 12, 4 }, { 7, 1 } },
- { { 64, 16 }, { 24, 8 }, { 12, 4 }, { 7, 1 } },
};
-static unsigned char good_quality_max_mesh_pct[MAX_MESH_SPEED + 1] = {
- 50, 25, 15, 5, 1, 1
-};
// Intra only frames, golden frames (except alt ref overlays) and
// alt ref frames tend to be coded at a higher than ambient quality
@@ -163,6 +158,7 @@
SPEED_FEATURES *sf,
int speed) {
const int boosted = frame_is_boosted(cpi);
+ int i;
sf->tx_size_search_breakout = 1;
sf->adaptive_rd_thresh = 1;
@@ -171,6 +167,19 @@
sf->use_square_partition_only = !frame_is_boosted(cpi);
sf->use_square_only_threshold = BLOCK_16X16;
+ if (cpi->twopass.fr_content_type == FC_GRAPHICS_ANIMATION) {
+ sf->exhaustive_searches_thresh = (1 << 22);
+ for (i = 0; i < MAX_MESH_STEP; ++i) {
+ int mesh_density_level = 0;
+ sf->mesh_patterns[i].range =
+ good_quality_mesh_patterns[mesh_density_level][i].range;
+ sf->mesh_patterns[i].interval =
+ good_quality_mesh_patterns[mesh_density_level][i].interval;
+ }
+ } else {
+ sf->exhaustive_searches_thresh = INT_MAX;
+ }
+
if (speed >= 1) {
if (cpi->oxcf.pass == 2) {
TWO_PASS *const twopass = &cpi->twopass;
@@ -208,6 +217,10 @@
sf->recode_tolerance_low = 15;
sf->recode_tolerance_high = 30;
+
+ sf->exhaustive_searches_thresh =
+ (cpi->twopass.fr_content_type == FC_GRAPHICS_ANIMATION) ? (1 << 23)
+ : INT_MAX;
}
if (speed >= 2) {
@@ -229,6 +242,16 @@
sf->allow_partition_search_skip = 1;
sf->recode_tolerance_low = 15;
sf->recode_tolerance_high = 45;
+
+ if (cpi->twopass.fr_content_type == FC_GRAPHICS_ANIMATION) {
+ for (i = 0; i < MAX_MESH_STEP; ++i) {
+ int mesh_density_level = 1;
+ sf->mesh_patterns[i].range =
+ good_quality_mesh_patterns[mesh_density_level][i].range;
+ sf->mesh_patterns[i].interval =
+ good_quality_mesh_patterns[mesh_density_level][i].interval;
+ }
+ }
}
if (speed >= 3) {
@@ -247,6 +270,16 @@
sf->intra_y_mode_mask[TX_32X32] = INTRA_DC;
sf->intra_uv_mode_mask[TX_32X32] = INTRA_DC;
sf->adaptive_interp_filter_search = 1;
+
+ if (cpi->twopass.fr_content_type == FC_GRAPHICS_ANIMATION) {
+ for (i = 0; i < MAX_MESH_STEP; ++i) {
+ int mesh_density_level = 2;
+ sf->mesh_patterns[i].range =
+ good_quality_mesh_patterns[mesh_density_level][i].range;
+ sf->mesh_patterns[i].interval =
+ good_quality_mesh_patterns[mesh_density_level][i].interval;
+ }
+ }
}
if (speed >= 4) {
@@ -325,7 +358,6 @@
sf->adaptive_rd_thresh = 1;
sf->adaptive_rd_thresh_row_mt = 0;
sf->use_fast_coef_costing = 1;
- sf->allow_exhaustive_searches = 0;
sf->exhaustive_searches_thresh = INT_MAX;
sf->allow_acl = 0;
sf->copy_partition_flag = 0;
@@ -609,7 +641,6 @@
// and multiple threads match
if (cpi->oxcf.row_mt_bit_exact) {
sf->adaptive_rd_thresh = 0;
- sf->allow_exhaustive_searches = 0;
sf->adaptive_pred_interp_filter = 0;
}
@@ -711,6 +742,16 @@
sf->adaptive_rd_thresh = 1;
sf->tx_size_search_breakout = 1;
+ sf->exhaustive_searches_thresh =
+ (cpi->twopass.fr_content_type == FC_GRAPHICS_ANIMATION) ? (1 << 20)
+ : INT_MAX;
+ if (cpi->twopass.fr_content_type == FC_GRAPHICS_ANIMATION) {
+ for (i = 0; i < MAX_MESH_STEP; ++i) {
+ sf->mesh_patterns[i].range = best_quality_mesh_pattern[i].range;
+ sf->mesh_patterns[i].interval = best_quality_mesh_pattern[i].interval;
+ }
+ }
+
if (oxcf->mode == REALTIME)
set_rt_speed_feature_framesize_independent(cpi, sf, oxcf->speed,
oxcf->content);
@@ -720,32 +761,6 @@
cpi->full_search_sad = vp9_full_search_sad;
cpi->diamond_search_sad = vp9_diamond_search_sad;
- if (cpi->twopass.fr_content_type == FC_GRAPHICS_ANIMATION) {
- sf->allow_exhaustive_searches = 1;
- if (oxcf->mode == BEST) {
- sf->exhaustive_searches_thresh = (1 << 20);
- sf->max_exaustive_pct = 100;
- for (i = 0; i < MAX_MESH_STEP; ++i) {
- sf->mesh_patterns[i].range = best_quality_mesh_pattern[i].range;
- sf->mesh_patterns[i].interval = best_quality_mesh_pattern[i].interval;
- }
- } else {
- int speed = (oxcf->speed > MAX_MESH_SPEED) ? MAX_MESH_SPEED : oxcf->speed;
- sf->exhaustive_searches_thresh = (1 << 22);
- sf->max_exaustive_pct = good_quality_max_mesh_pct[speed];
- if (speed > 0)
- sf->exhaustive_searches_thresh = sf->exhaustive_searches_thresh << 1;
-
- for (i = 0; i < MAX_MESH_STEP; ++i) {
- sf->mesh_patterns[i].range = good_quality_mesh_patterns[speed][i].range;
- sf->mesh_patterns[i].interval =
- good_quality_mesh_patterns[speed][i].interval;
- }
- }
- } else {
- sf->allow_exhaustive_searches = 0;
- }
-
// Slow quant, dct and trellis not worthwhile for first pass
// so make sure they are always turned off.
if (oxcf->pass == 1) sf->optimize_coefficients = 0;
@@ -783,7 +798,6 @@
// and multiple threads match
if (cpi->oxcf.row_mt_bit_exact) {
sf->adaptive_rd_thresh = 0;
- sf->allow_exhaustive_searches = 0;
sf->adaptive_pred_interp_filter = 0;
}
--- a/vp9/encoder/vp9_speed_features.h
+++ b/vp9/encoder/vp9_speed_features.h
@@ -325,14 +325,8 @@
// point for this motion search and limits the search range around it.
int adaptive_motion_search;
- // Flag for allowing some use of exhaustive searches;
- int allow_exhaustive_searches;
-
// Threshold for allowing exhaistive motion search.
int exhaustive_searches_thresh;
-
- // Maximum number of exhaustive searches for a frame.
- int max_exaustive_pct;
// Pattern to be used for any exhaustive mesh searches.
MESH_PATTERN mesh_patterns[MAX_MESH_STEP];