ref: df0d3a415216340a44953c6ed936bc2a4d7a1175
parent: f74e0229d5575af20498a9dbf48f1d305dfc8719
parent: c56ab7d0c6f3fb215d571db3dacc0cc908c1b53c
author: Sigrid Solveig Haflínudóttir <ftrvxmtrx@gmail.com>
date: Tue Oct 26 21:43:39 EDT 2021
Merge remote-tracking branch 'upstream/master'
--- a/.mailmap
+++ b/.mailmap
@@ -12,6 +12,8 @@
Elliott Karpilovsky <elliottk@google.com>
Erik Niemeyer <erik.a.niemeyer@intel.com> <erik.a.niemeyer@gmail.com>
Fyodor Kyslov <kyslov@google.com>
+Gregor Jasny <gjasny@gmail.com>
+Gregor Jasny <gjasny@gmail.com> <gjasny@googlemail.com>
Guillaume Martres <gmartres@google.com> <smarter3@gmail.com>
Hangyu Kuang <hkuang@google.com>
Hui Su <huisu@google.com>
--- a/AUTHORS
+++ b/AUTHORS
@@ -3,6 +3,7 @@
Aaron Watry <awatry@gmail.com>
Abo Talib Mahfoodh <ab.mahfoodh@gmail.com>
+Adam B. Goode <adam.mckee84@gmail.com>
Adrian Grange <agrange@google.com>
Ahmad Sharif <asharif@google.com>
Aidan Welch <aidansw@yahoo.com>
@@ -25,6 +26,7 @@
Aron Rosenberg <arosenberg@logitech.com>
Attila Nagy <attilanagy@google.com>
Birk Magnussen <birk.magnussen@googlemail.com>
+Bohan Li <bohanli@google.com>
Brian Foley <bpfoley@google.com>
Brion Vibber <bvibber@wikimedia.org>
changjun.yang <changjun.yang@intel.com>
@@ -34,9 +36,11 @@
chm <chm@rock-chips.com>
Chris Cunningham <chcunningham@chromium.org>
Christian Duvivier <cduvivier@google.com>
+Chunbo Hua <chunbo.hua@intel.com>
Clement Courbet <courbet@google.com>
Daniele Castagna <dcastagna@chromium.org>
Daniel Kang <ddkang@google.com>
+Daniel Sommermann <dcsommer@gmail.com>
Dan Zhu <zxdan@google.com>
Deb Mukherjee <debargha@google.com>
Deepa K G <deepa.kg@ittiam.com>
@@ -67,6 +71,7 @@
Harish Mahendrakar <harish.mahendrakar@ittiam.com>
Henrik Lundin <hlundin@google.com>
Hien Ho <hienho@google.com>
+Hirokazu Honda <hiroh@chromium.org>
Hui Su <huisu@google.com>
Ivan Krasin <krasin@chromium.org>
Ivan Maltz <ivanmaltz@google.com>
@@ -73,6 +78,7 @@
Jacek Caban <cjacek@gmail.com>
Jacky Chen <jackychen@google.com>
James Berry <jamesberry@google.com>
+James Touton <bekenn@gmail.com>
James Yu <james.yu@linaro.org>
James Zern <jzern@google.com>
Jan Gerber <j@mailb.org>
@@ -82,16 +88,20 @@
Jeff Faust <jfaust@google.com>
Jeff Muizelaar <jmuizelaar@mozilla.com>
Jeff Petkau <jpet@chromium.org>
+Jeremy Leconte <jleconte@google.com>
Jerome Jiang <jianj@google.com>
Jia Jia <jia.jia@linaro.org>
Jian Zhou <zhoujian@google.com>
Jim Bankoski <jimbankoski@google.com>
+jinbo <jinbo-hf@loongson.cn>
Jingning Han <jingning@google.com>
+Joel Fernandes <joelaf@google.com>
Joey Parrish <joeyparrish@google.com>
Johann Koenig <johannkoenig@google.com>
John Koleszar <jkoleszar@google.com>
Johnny Klonaris <google@jawknee.com>
John Stark <jhnstrk@gmail.com>
+Jonathan Wright <jonathan.wright@arm.com>
Jon Kunkee <jkunkee@microsoft.com>
Jorge E. Moreira <jemoreira@google.com>
Joshua Bleecher Snyder <josh@treelinelabs.com>
@@ -141,6 +151,7 @@
Peter Boström <pbos@chromium.org>
Peter Collingbourne <pcc@chromium.org>
Peter de Rivaz <peter.derivaz@gmail.com>
+Peter Kasting <pkasting@chromium.org>
Philip Jägenstedt <philipj@opera.com>
Priit Laes <plaes@plaes.org>
Rafael Ávila de Espíndola <rafael.espindola@gmail.com>
--- a/CHANGELOG
+++ b/CHANGELOG
@@ -1,3 +1,59 @@
+2021-09-27 v1.11.0 "Smew Duck"
+ This maintenance release adds support for VBR mode in VP9 rate control
+ interface, new codec controls to get quantization parameters and loop filter
+ levels, and includes several improvements to NEON and numerous bug fixes.
+
+ - Upgrading:
+ New codec control is added to get quantization parameters and loop filter
+ levels.
+
+ VBR mode is supported in VP9 rate control library.
+
+ - Enhancement:
+ Numerous improvements for Neon optimizations.
+ Code clean-up and refactoring.
+ Calculation of rd multiplier is changed with BDRATE gains.
+
+ - Bug fixes:
+ Fix to overflow on duration.
+ Fix to several instances of -Wunused-but-set-variable.
+ Fix to avoid chroma resampling for 420mpeg2 input.
+ Fix to overflow in calc_iframe_target_size.
+ Fix to disallow skipping transform and quantization.
+ Fix some -Wsign-compare warnings in simple_encode.
+ Fix input file path in simple_encode_test.
+ Fix valid range for under/over_shoot pct.
+
+2021-03-09 v1.10.0 "Ruddy Duck"
+ This maintenance release adds support for darwin20 and new codec controls, as
+ well as numerous bug fixes.
+
+ - Upgrading:
+ New codec control is added to disable loopfilter for VP9.
+
+ New encoder control is added to disable feature to increase Q on overshoot
+ detection for CBR.
+
+ Configure support for darwin20 is added.
+
+ New codec control is added for VP9 rate control. The control ID of this
+ interface is VP9E_SET_EXTERNAL_RATE_CONTROL. To make VP9 use a customized
+ external rate control model, users will have to implement each callback
+ function in vpx_rc_funcs_t and register them using libvpx API
+ vpx_codec_control_() with the control ID.
+
+ - Enhancement:
+ Use -std=gnu++11 instead of -std=c++11 for c++ files.
+
+ - Bug fixes:
+ Override assembler with --as option of configure for MSVS.
+ Fix several compilation issues with gcc 4.8.5.
+ Fix to resetting rate control for temporal layers.
+ Fix to the rate control stats of SVC example encoder when number of spatial
+ layers is 1.
+ Fix to reusing motion vectors from the base spatial layer in SVC.
+ 2 pass related flags removed from SVC example encoder.
+
2020-07-29 v1.9.0 "Quacking Duck"
This release adds support for NV12, a separate library for rate control, as
well as incremental improvements.
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -19,10 +19,9 @@
All submissions, including submissions by project members, require review. We
use a [Gerrit](https://www.gerritcodereview.com) instance hosted at
-https://chromium-review.googlesource.com for this purpose.
-
-See https://www.webmproject.org/code/contribute/submitting-patches for an
-example of a typical gerrit workflow.
+https://chromium-review.googlesource.com for this purpose. See the
+[WebM Project page](https://www.webmproject.org/code/contribute/submitting-patches/)
+for additional details.
## Community Guidelines
--- a/README
+++ b/README
@@ -1,4 +1,4 @@
-README - 20 July 2020
+README - 08 March 2021
Welcome to the WebM VP8/VP9 Codec SDK!
@@ -10,14 +10,14 @@
1. Prerequisites
* All x86 targets require the Yasm[1] assembler be installed[2].
- * All Windows builds require that Cygwin[3] be installed.
- * Building the documentation requires Doxygen[4]. If you do not
+ * All Windows builds require that Cygwin[3] or MSYS2[4] be installed.
+ * Building the documentation requires Doxygen[5]. If you do not
have this package, the install-docs option will be disabled.
- * Downloading the data for the unit tests requires curl[5] and sha1sum.
+ * Downloading the data for the unit tests requires curl[6] and sha1sum.
sha1sum is provided via the GNU coreutils, installed by default on
many *nix platforms, as well as MinGW and Cygwin. If coreutils is not
available, a compatible version of sha1sum can be built from
- source[6]. These requirements are optional if not running the unit
+ source[7]. These requirements are optional if not running the unit
tests.
[1]: http://www.tortall.net/projects/yasm
@@ -26,9 +26,10 @@
yasm-<version>-<arch>.exe to yasm.exe and place it in:
Program Files (x86)/Microsoft Visual Studio/2017/<level>/Common7/Tools/
[3]: http://www.cygwin.com
- [4]: http://www.doxygen.org
- [5]: http://curl.haxx.se
- [6]: http://www.microbrew.org/tools/md5sha1sum/
+ [4]: http://www.msys2.org/
+ [5]: http://www.doxygen.org
+ [6]: http://curl.haxx.se
+ [7]: http://www.microbrew.org/tools/md5sha1sum/
2. Out-of-tree builds
Out of tree builds are a supported method of building the application. For
--- a/args.c
+++ b/args.c
@@ -18,8 +18,10 @@
#include "vpx/vpx_integer.h"
#include "vpx_ports/msvc.h"
-#if defined(__GNUC__) && __GNUC__
-extern void die(const char *fmt, ...) __attribute__((noreturn));
+#if defined(__GNUC__)
+__attribute__((noreturn)) extern void die(const char *fmt, ...);
+#elif defined(_MSC_VER)
+__declspec(noreturn) extern void die(const char *fmt, ...);
#else
extern void die(const char *fmt, ...);
#endif
--- a/build/make/Android.mk
+++ b/build/make/Android.mk
@@ -166,6 +166,9 @@
-I$(ASM_CNV_PATH)/libvpx
LOCAL_MODULE := libvpx
+LOCAL_LICENSE_KINDS := SPDX-license-identifier-BSD
+LOCAL_LICENSE_CONDITIONS := notice
+LOCAL_NOTICE_FILE := $(LOCAL_PATH)/../../LICENSE $(LOCAL_PATH)/../../PATENTS
ifeq ($(CONFIG_RUNTIME_CPU_DETECT),yes)
LOCAL_STATIC_LIBRARIES := cpufeatures
--- a/build/make/configure.sh
+++ b/build/make/configure.sh
@@ -1296,10 +1296,6 @@
enabled optimizations && disabled gprof && check_add_cflags -fomit-frame-pointer
;;
vs*)
- # When building with Microsoft Visual Studio the assembler is
- # invoked directly. Checking at configure time is unnecessary.
- # Skip the check by setting AS arbitrarily
- AS=msvs
msvs_arch_dir=x86-msvs
case ${tgt_cc##vs} in
14)
--- a/build/make/gen_msvs_vcxproj.sh
+++ b/build/make/gen_msvs_vcxproj.sh
@@ -157,6 +157,8 @@
;;
--lib) proj_kind="lib"
;;
+ --as=*) as="${optval}"
+ ;;
--src-path-bare=*)
src_path_bare=$(fix_path "$optval")
src_path_bare=${src_path_bare%/}
@@ -247,13 +249,13 @@
case "$target" in
x86_64*)
platforms[0]="x64"
- asm_Debug_cmdline="yasm -Xvc -g cv8 -f win64 ${yasmincs} "%(FullPath)""
- asm_Release_cmdline="yasm -Xvc -f win64 ${yasmincs} "%(FullPath)""
+ asm_Debug_cmdline="${as} -Xvc -gcv8 -f win64 ${yasmincs} "%(FullPath)""
+ asm_Release_cmdline="${as} -Xvc -f win64 ${yasmincs} "%(FullPath)""
;;
x86*)
platforms[0]="Win32"
- asm_Debug_cmdline="yasm -Xvc -g cv8 -f win32 ${yasmincs} "%(FullPath)""
- asm_Release_cmdline="yasm -Xvc -f win32 ${yasmincs} "%(FullPath)""
+ asm_Debug_cmdline="${as} -Xvc -gcv8 -f win32 ${yasmincs} "%(FullPath)""
+ asm_Release_cmdline="${as} -Xvc -f win32 ${yasmincs} "%(FullPath)""
;;
arm64*)
platforms[0]="ARM64"
--- a/build/make/msvs_common.sh
+++ b/build/make/msvs_common.sh
@@ -9,7 +9,8 @@
## be found in the AUTHORS file in the root of the source tree.
##
-if [ "$(uname -o 2>/dev/null)" = "Cygwin" ] \
+shell_name="$(uname -o 2>/dev/null)"
+if [[ "$shell_name" = "Cygwin" || "$shell_name" = "Msys" ]] \
&& cygpath --help >/dev/null 2>&1; then
FIXPATH='cygpath -m'
else
--- a/configure
+++ b/configure
@@ -731,17 +731,17 @@
soft_enable libyuv
;;
*-android-*)
- check_add_cxxflags -std=c++11 && soft_enable webm_io
+ check_add_cxxflags -std=gnu++11 && soft_enable webm_io
soft_enable libyuv
# GTestLog must be modified to use Android logging utilities.
;;
*-darwin-*)
- check_add_cxxflags -std=c++11
+ check_add_cxxflags -std=gnu++11
# iOS/ARM builds do not work with gtest. This does not match
# x86 targets.
;;
*-iphonesimulator-*)
- check_add_cxxflags -std=c++11 && soft_enable webm_io
+ check_add_cxxflags -std=gnu++11 && soft_enable webm_io
soft_enable libyuv
;;
*-win*)
@@ -748,7 +748,7 @@
# Some mingw toolchains don't have pthread available by default.
# Treat these more like visual studio where threading in gtest
# would be disabled for the same reason.
- check_add_cxxflags -std=c++11 && soft_enable unit_tests \
+ check_add_cxxflags -std=gnu++11 && soft_enable unit_tests \
&& soft_enable webm_io
check_cxx "$@" <<EOF && soft_enable libyuv
int z;
@@ -755,9 +755,9 @@
EOF
;;
*)
- enabled pthread_h && check_add_cxxflags -std=c++11 \
+ enabled pthread_h && check_add_cxxflags -std=gnu++11 \
&& soft_enable unit_tests
- check_add_cxxflags -std=c++11 && soft_enable webm_io
+ check_add_cxxflags -std=gnu++11 && soft_enable webm_io
check_cxx "$@" <<EOF && soft_enable libyuv
int z;
EOF
--- a/examples.mk
+++ b/examples.mk
@@ -376,6 +376,7 @@
--ver=$$(CONFIG_VS_VERSION)\
--proj-guid=$$($$(@:.$(VCPROJ_SFX)=).GUID)\
--src-path-bare="$(SRC_PATH_BARE)" \
+ --as=$$(AS) \
$$(if $$(CONFIG_STATIC_MSVCRT),--static-crt) \
--out=$$@ $$(INTERNAL_CFLAGS) $$(CFLAGS) \
$$(INTERNAL_LDFLAGS) $$(LDFLAGS) -l$$(CODEC_LIB) $$^
--- a/examples/vp9_spatial_svc_encoder.c
+++ b/examples/vp9_spatial_svc_encoder.c
@@ -66,12 +66,6 @@
ARG_DEF("k", "kf-dist", 1, "number of frames between keyframes");
static const arg_def_t scale_factors_arg =
ARG_DEF("r", "scale-factors", 1, "scale factors (lowest to highest layer)");
-static const arg_def_t passes_arg =
- ARG_DEF("p", "passes", 1, "Number of passes (1/2)");
-static const arg_def_t pass_arg =
- ARG_DEF(NULL, "pass", 1, "Pass to execute (1/2)");
-static const arg_def_t fpf_name_arg =
- ARG_DEF(NULL, "fpf", 1, "First pass statistics file name");
static const arg_def_t min_q_arg =
ARG_DEF(NULL, "min-q", 1, "Minimum quantizer");
static const arg_def_t max_q_arg =
@@ -125,9 +119,6 @@
&spatial_layers_arg,
&kf_dist_arg,
&scale_factors_arg,
- &passes_arg,
- &pass_arg,
- &fpf_name_arg,
&min_q_arg,
&max_q_arg,
&min_bitrate_arg,
@@ -173,8 +164,6 @@
uint32_t frames_to_skip;
struct VpxInputContext input_ctx;
stats_io_t rc_stats;
- int passes;
- int pass;
int tune_content;
int inter_layer_pred;
} AppInput;
@@ -197,9 +186,6 @@
char **argi = NULL;
char **argj = NULL;
vpx_codec_err_t res;
- int passes = 0;
- int pass = 0;
- const char *fpf_file_name = NULL;
unsigned int min_bitrate = 0;
unsigned int max_bitrate = 0;
char string_options[1024] = { 0 };
@@ -289,18 +275,6 @@
sizeof(string_options) - strlen(string_options) - 1);
strncat(string_options, arg.val,
sizeof(string_options) - strlen(string_options) - 1);
- } else if (arg_match(&arg, &passes_arg, argi)) {
- passes = arg_parse_uint(&arg);
- if (passes < 1 || passes > 2) {
- die("Error: Invalid number of passes (%d)\n", passes);
- }
- } else if (arg_match(&arg, &pass_arg, argi)) {
- pass = arg_parse_uint(&arg);
- if (pass < 1 || pass > 2) {
- die("Error: Invalid pass selected (%d)\n", pass);
- }
- } else if (arg_match(&arg, &fpf_name_arg, argi)) {
- fpf_file_name = arg.val;
} else if (arg_match(&arg, &min_q_arg, argi)) {
strncat(string_options, " min-quantizers=",
sizeof(string_options) - strlen(string_options) - 1);
@@ -355,36 +329,8 @@
if (strlen(string_options) > 0)
vpx_svc_set_options(svc_ctx, string_options + 1);
- if (passes == 0 || passes == 1) {
- if (pass) {
- fprintf(stderr, "pass is ignored since there's only one pass\n");
- }
- enc_cfg->g_pass = VPX_RC_ONE_PASS;
- } else {
- if (pass == 0) {
- die("pass must be specified when passes is 2\n");
- }
+ enc_cfg->g_pass = VPX_RC_ONE_PASS;
- if (fpf_file_name == NULL) {
- die("fpf must be specified when passes is 2\n");
- }
-
- if (pass == 1) {
- enc_cfg->g_pass = VPX_RC_FIRST_PASS;
- if (!stats_open_file(&app_input->rc_stats, fpf_file_name, 0)) {
- fatal("Failed to open statistics store");
- }
- } else {
- enc_cfg->g_pass = VPX_RC_LAST_PASS;
- if (!stats_open_file(&app_input->rc_stats, fpf_file_name, 1)) {
- fatal("Failed to open statistics store");
- }
- enc_cfg->rc_twopass_stats_in = stats_get(&app_input->rc_stats);
- }
- app_input->passes = passes;
- app_input->pass = pass;
- }
-
if (enc_cfg->rc_target_bitrate > 0) {
if (min_bitrate > 0) {
enc_cfg->rc_2pass_vbr_minsection_pct =
@@ -1004,13 +950,11 @@
info.time_base.numerator = enc_cfg.g_timebase.num;
info.time_base.denominator = enc_cfg.g_timebase.den;
- if (!(app_input.passes == 2 && app_input.pass == 1)) {
- // We don't save the bitstream for the 1st pass on two pass rate control
- writer =
- vpx_video_writer_open(app_input.output_filename, kContainerIVF, &info);
- if (!writer)
- die("Failed to open %s for writing\n", app_input.output_filename);
- }
+ writer =
+ vpx_video_writer_open(app_input.output_filename, kContainerIVF, &info);
+ if (!writer)
+ die("Failed to open %s for writing\n", app_input.output_filename);
+
#if OUTPUT_RC_STATS
// Write out spatial layer stream.
// TODO(marpan/jianj): allow for writing each spatial and temporal stream.
@@ -1230,7 +1174,6 @@
#endif
if (vpx_codec_destroy(&encoder))
die_codec(&encoder, "Failed to destroy codec");
- if (app_input.passes == 2) stats_close(&app_input.rc_stats, 1);
if (writer) {
vpx_video_writer_close(writer);
}
--- a/examples/vpx_dec_fuzzer.cc
+++ b/examples/vpx_dec_fuzzer.cc
@@ -41,7 +41,7 @@
$make -j32
* Build vp9 fuzzer
- $ $CXX $CXXFLAGS -std=c++11 -DDECODER=vp9 \
+ $ $CXX $CXXFLAGS -std=gnu++11 -DDECODER=vp9 \
-fsanitize=fuzzer -I../libvpx -I. -Wl,--start-group \
../libvpx/examples/vpx_dec_fuzzer.cc -o ./vpx_dec_fuzzer_vp9 \
./libvpx.a -Wl,--end-group
--- a/examples/vpx_temporal_svc_encoder.c
+++ b/examples/vpx_temporal_svc_encoder.c
@@ -831,6 +831,7 @@
} else if (strncmp(encoder->name, "vp9", 3) == 0) {
vpx_svc_extra_cfg_t svc_params;
memset(&svc_params, 0, sizeof(svc_params));
+ vpx_codec_control(&codec, VP9E_SET_POSTENCODE_DROP, 0);
vpx_codec_control(&codec, VP9E_SET_DISABLE_OVERSHOOT_MAXQ_CBR, 0);
vpx_codec_control(&codec, VP8E_SET_CPUUSED, speed);
vpx_codec_control(&codec, VP9E_SET_AQ_MODE, 3);
@@ -930,6 +931,7 @@
// Update for short-time encoding bitrate states, for moving window
// of size rc->window, shifted by rc->window / 2.
// Ignore first window segment, due to key frame.
+ if (rc.window_size == 0) rc.window_size = 15;
if (frame_cnt > rc.window_size) {
sum_bitrate += 0.001 * 8.0 * pkt->data.frame.sz * framerate;
if (frame_cnt % rc.window_size == 0) {
--- a/libs.mk
+++ b/libs.mk
@@ -94,15 +94,28 @@
INSTALL_MAPS += include/vpx/% $(SRC_PATH_BARE)/$(VP9_PREFIX)/%
CODEC_DOC_SRCS += vpx/vp8.h vpx/vp8cx.h vpx/vpx_ext_ratectrl.h
CODEC_DOC_SECTIONS += vp9 vp9_encoder
+endif
- RC_RTC_SRCS := $(addprefix $(VP9_PREFIX),$(call enabled,VP9_CX_SRCS))
- RC_RTC_SRCS += $(VP9_PREFIX)vp9cx.mk vpx/vp8.h vpx/vp8cx.h
- RC_RTC_SRCS += vpx/vpx_ext_ratectrl.h
+RC_RTC_SRCS := vpx/vp8.h vpx/vp8cx.h
+RC_RTC_SRCS += vpx/vpx_ext_ratectrl.h
+RC_RTC_SRCS += vpx/internal/vpx_ratectrl_rtc.h
+ifeq ($(CONFIG_VP9_ENCODER),yes)
+ VP9_PREFIX=vp9/
+ RC_RTC_SRCS += $(addprefix $(VP9_PREFIX),$(call enabled,VP9_CX_SRCS))
+ RC_RTC_SRCS += $(VP9_PREFIX)vp9cx.mk
RC_RTC_SRCS += $(VP9_PREFIX)ratectrl_rtc.cc
RC_RTC_SRCS += $(VP9_PREFIX)ratectrl_rtc.h
INSTALL-SRCS-$(CONFIG_CODEC_SRCS) += $(VP9_PREFIX)ratectrl_rtc.cc
INSTALL-SRCS-$(CONFIG_CODEC_SRCS) += $(VP9_PREFIX)ratectrl_rtc.h
endif
+ifeq ($(CONFIG_VP8_ENCODER),yes)
+ VP8_PREFIX=vp8/
+ RC_RTC_SRCS += $(addprefix $(VP8_PREFIX),$(call enabled,VP8_CX_SRCS))
+ RC_RTC_SRCS += $(VP8_PREFIX)vp8_ratectrl_rtc.cc
+ RC_RTC_SRCS += $(VP8_PREFIX)vp8_ratectrl_rtc.h
+ INSTALL-SRCS-$(CONFIG_CODEC_SRCS) += $(VP8_PREFIX)vp8_ratectrl_rtc.cc
+ INSTALL-SRCS-$(CONFIG_CODEC_SRCS) += $(VP8_PREFIX)vp8_ratectrl_rtc.h
+endif
ifeq ($(CONFIG_VP9_DECODER),yes)
VP9_PREFIX=vp9/
@@ -126,7 +139,7 @@
ifeq ($(CONFIG_MSVS),yes)
CODEC_LIB=$(if $(CONFIG_STATIC_MSVCRT),vpxmt,vpxmd)
GTEST_LIB=$(if $(CONFIG_STATIC_MSVCRT),gtestmt,gtestmd)
-RC_RTC_LIB=$(if $(CONFIG_STATIC_MSVCRT),vp9rcmt,vp9rcmd)
+RC_RTC_LIB=$(if $(CONFIG_STATIC_MSVCRT),vpxrcmt,vpxrcmd)
# This variable uses deferred expansion intentionally, since the results of
# $(wildcard) may change during the course of the Make.
VS_PLATFORMS = $(foreach d,$(wildcard */Release/$(CODEC_LIB).lib),$(word 1,$(subst /, ,$(d))))
@@ -232,6 +245,7 @@
--ver=$(CONFIG_VS_VERSION) \
--src-path-bare="$(SRC_PATH_BARE)" \
--out=$@ $(CFLAGS) \
+ --as=$(AS) \
$(filter $(SRC_PATH_BARE)/vp8/%.c, $(VCPROJ_SRCS)) \
$(filter $(SRC_PATH_BARE)/vp8/%.h, $(VCPROJ_SRCS)) \
$(filter $(SRC_PATH_BARE)/vp9/%.c, $(VCPROJ_SRCS)) \
@@ -248,20 +262,21 @@
vpx.$(VCPROJ_SFX): vpx_config.asm
vpx.$(VCPROJ_SFX): $(RTCD)
-vp9rc.$(VCPROJ_SFX): \
+vpxrc.$(VCPROJ_SFX): \
VCPROJ_SRCS=$(filter-out $(addprefix %, $(ASM_INCLUDES)), $^)
-vp9rc.$(VCPROJ_SFX): $(RC_RTC_SRCS)
+vpxrc.$(VCPROJ_SFX): $(RC_RTC_SRCS)
@echo " [CREATE] $@"
$(qexec)$(GEN_VCPROJ) \
$(if $(CONFIG_SHARED),--dll,--lib) \
--target=$(TOOLCHAIN) \
$(if $(CONFIG_STATIC_MSVCRT),--static-crt) \
- --name=vp9rc \
+ --name=vpxrc \
--proj-guid=C26FF952-9494-4838-9A3F-7F3D4F613385 \
--ver=$(CONFIG_VS_VERSION) \
--src-path-bare="$(SRC_PATH_BARE)" \
--out=$@ $(CFLAGS) \
+ --as=$(AS) \
$(filter $(SRC_PATH_BARE)/vp9/%.c, $(VCPROJ_SRCS)) \
$(filter $(SRC_PATH_BARE)/vp9/%.cc, $(VCPROJ_SRCS)) \
$(filter $(SRC_PATH_BARE)/vp9/%.h, $(VCPROJ_SRCS)) \
@@ -273,10 +288,10 @@
$(VCPROJ_SRCS)) \
--src-path-bare="$(SRC_PATH_BARE)" \
-PROJECTS-yes += vp9rc.$(VCPROJ_SFX)
+PROJECTS-yes += vpxrc.$(VCPROJ_SFX)
-vp9rc.$(VCPROJ_SFX): vpx_config.asm
-vp9rc.$(VCPROJ_SFX): $(RTCD)
+vpxrc.$(VCPROJ_SFX): vpx_config.asm
+vpxrc.$(VCPROJ_SFX): $(RTCD)
endif # ifeq ($(CONFIG_MSVS),yes)
else # ifeq ($(CONFIG_EXTERNAL_BUILD),yes)
@@ -285,8 +300,20 @@
LIBS-$(if yes,$(CONFIG_STATIC)) += $(BUILD_PFX)libvpx.a $(BUILD_PFX)libvpx_g.a
$(BUILD_PFX)libvpx_g.a: $(LIBVPX_OBJS)
-SO_VERSION_MAJOR := 6
-SO_VERSION_MINOR := 3
+# Updating version info.
+# https://www.gnu.org/software/libtool/manual/libtool.html#Updating-version-info
+# For libtool: c=<current>, a=<age>, r=<revision>
+# libtool generates .so file as .so.[c-a].a.r, while -version-info c:r:a is
+# passed to libtool.
+#
+# libvpx library file is generated as libvpx.so.<MAJOR>.<MINOR>.<PATCH>
+# MAJOR = c-a, MINOR = a, PATCH = r
+#
+# To determine SO_VERSION_{MAJOR,MINOR,PATCH}, calculate c,a,r with current
+# SO_VERSION_* then follow the rules in the link to detemine the new version
+# (c1, a1, r1) and set MAJOR to [c1-a1], MINOR to a1 and PATCH to r1
+SO_VERSION_MAJOR := 7
+SO_VERSION_MINOR := 0
SO_VERSION_PATCH := 0
ifeq ($(filter darwin%,$(TGT_OS)),$(TGT_OS))
LIBVPX_SO := libvpx.$(SO_VERSION_MAJOR).dylib
@@ -384,12 +411,11 @@
INSTALL_MAPS += $(LIBSUBDIR)/pkgconfig/%.pc %.pc
CLEAN-OBJS += vpx.pc
-ifeq ($(CONFIG_VP9_ENCODER),yes)
+ifeq ($(CONFIG_ENCODERS),yes)
RC_RTC_OBJS=$(call objs,$(RC_RTC_SRCS))
- RC_RTC_OBJS=$(call objs,$(RC_RTC_SRCS))
OBJS-yes += $(RC_RTC_OBJS)
- LIBS-yes += $(BUILD_PFX)libvp9rc.a $(BUILD_PFX)libvp9rc_g.a
- $(BUILD_PFX)libvp9rc_g.a: $(RC_RTC_OBJS)
+ LIBS-yes += $(BUILD_PFX)libvpxrc.a $(BUILD_PFX)libvpxrc_g.a
+ $(BUILD_PFX)libvpxrc_g.a: $(RC_RTC_OBJS)
endif
ifeq ($(CONFIG_VP9_ENCODER)$(CONFIG_RATE_CTRL),yesyes)
@@ -479,10 +505,12 @@
$(call enabled,TEST_INTRA_PRED_SPEED_SRCS))
TEST_INTRA_PRED_SPEED_OBJS := $(sort $(call objs,$(TEST_INTRA_PRED_SPEED_SRCS)))
+ifeq ($(CONFIG_ENCODERS),yes)
RC_INTERFACE_TEST_BIN=./test_rc_interface$(EXE_SFX)
RC_INTERFACE_TEST_SRCS=$(call addprefix_clean,test/,\
$(call enabled,RC_INTERFACE_TEST_SRCS))
RC_INTERFACE_TEST_OBJS := $(sort $(call objs,$(RC_INTERFACE_TEST_SRCS)))
+endif
SIMPLE_ENCODE_TEST_BIN=./test_simple_encode$(EXE_SFX)
SIMPLE_ENCODE_TEST_SRCS=$(call addprefix_clean,test/,\
@@ -536,6 +564,7 @@
--proj-guid=EC00E1EC-AF68-4D92-A255-181690D1C9B1 \
--ver=$(CONFIG_VS_VERSION) \
--src-path-bare="$(SRC_PATH_BARE)" \
+ --as=$(AS) \
-D_VARIADIC_MAX=10 \
--out=gtest.$(VCPROJ_SFX) $(SRC_PATH_BARE)/third_party/googletest/src/src/gtest-all.cc \
-I. -I"$(SRC_PATH_BARE)/third_party/googletest/src/include" -I"$(SRC_PATH_BARE)/third_party/googletest/src"
@@ -552,6 +581,7 @@
--proj-guid=CD837F5F-52D8-4314-A370-895D614166A7 \
--ver=$(CONFIG_VS_VERSION) \
--src-path-bare="$(SRC_PATH_BARE)" \
+ --as=$(AS) \
$(if $(CONFIG_STATIC_MSVCRT),--static-crt) \
--out=$@ $(INTERNAL_CFLAGS) $(CFLAGS) \
-I. -I"$(SRC_PATH_BARE)/third_party/googletest/src/include" \
@@ -574,6 +604,7 @@
--proj-guid=CD837F5F-52D8-4314-A370-895D614166A7 \
--ver=$(CONFIG_VS_VERSION) \
--src-path-bare="$(SRC_PATH_BARE)" \
+ --as=$(AS) \
$(if $(CONFIG_STATIC_MSVCRT),--static-crt) \
--out=$@ $(INTERNAL_CFLAGS) $(CFLAGS) \
-I. -I"$(SRC_PATH_BARE)/third_party/googletest/src/include" \
@@ -580,10 +611,11 @@
-L. -l$(CODEC_LIB) -l$(GTEST_LIB) $^
endif # TEST_INTRA_PRED_SPEED
+ifeq ($(CONFIG_ENCODERS),yes)
ifneq ($(strip $(RC_INTERFACE_TEST_OBJS)),)
PROJECTS-$(CONFIG_MSVS) += test_rc_interface.$(VCPROJ_SFX)
test_rc_interface.$(VCPROJ_SFX): $(RC_INTERFACE_TEST_SRCS) vpx.$(VCPROJ_SFX) \
- vp9rc.$(VCPROJ_SFX) gtest.$(VCPROJ_SFX)
+ vpxrc.$(VCPROJ_SFX) gtest.$(VCPROJ_SFX)
@echo " [CREATE] $@"
$(qexec)$(GEN_VCPROJ) \
--exe \
@@ -592,6 +624,7 @@
-D_VARIADIC_MAX=10 \
--proj-guid=30458F88-1BC6-4689-B41C-50F3737AAB27 \
--ver=$(CONFIG_VS_VERSION) \
+ --as=$(AS) \
--src-path-bare="$(SRC_PATH_BARE)" \
$(if $(CONFIG_STATIC_MSVCRT),--static-crt) \
--out=$@ $(INTERNAL_CFLAGS) $(CFLAGS) \
@@ -598,6 +631,7 @@
-I. -I"$(SRC_PATH_BARE)/third_party/googletest/src/include" \
-L. -l$(CODEC_LIB) -l$(RC_RTC_LIB) -l$(GTEST_LIB) $^
endif # RC_INTERFACE_TEST
+endif # CONFIG_VP9_ENCODER
endif
else
@@ -639,6 +673,7 @@
-L. -lvpx -lgtest $(extralibs) -lm))
endif # TEST_INTRA_PRED_SPEED
+ifeq ($(CONFIG_ENCODERS),yes)
ifneq ($(strip $(RC_INTERFACE_TEST_OBJS)),)
$(RC_INTERFACE_TEST_OBJS) $(RC_INTERFACE_TEST_OBJS:.o=.d): \
CXXFLAGS += $(GTEST_INCLUDES)
@@ -645,11 +680,12 @@
OBJS-yes += $(RC_INTERFACE_TEST_OBJS)
BINS-yes += $(RC_INTERFACE_TEST_BIN)
-$(RC_INTERFACE_TEST_BIN): $(TEST_LIBS) libvp9rc.a
+$(RC_INTERFACE_TEST_BIN): $(TEST_LIBS) libvpxrc.a
$(eval $(call linkerxx_template,$(RC_INTERFACE_TEST_BIN), \
$(RC_INTERFACE_TEST_OBJS) \
- -L. -lvpx -lgtest -lvp9rc $(extralibs) -lm))
+ -L. -lvpx -lgtest -lvpxrc $(extralibs) -lm))
endif # RC_INTERFACE_TEST
+endif # CONFIG_ENCODERS
ifneq ($(strip $(SIMPLE_ENCODE_TEST_OBJS)),)
$(SIMPLE_ENCODE_TEST_OBJS) $(SIMPLE_ENCODE_TEST_OBJS:.o=.d): \
--- a/test/android/Android.mk
+++ b/test/android/Android.mk
@@ -34,6 +34,9 @@
LOCAL_C_INCLUDES += $(LOCAL_PATH)/third_party/googletest/src/include/
LOCAL_EXPORT_C_INCLUDES := $(LOCAL_PATH)/third_party/googletest/src/include/
LOCAL_SRC_FILES := ./third_party/googletest/src/src/gtest-all.cc
+LOCAL_LICENSE_KINDS := SPDX-license-identifier-BSD
+LOCAL_LICENSE_CONDITIONS := notice
+LOCAL_NOTICE_FILE := $(LOCAL_PATH)/../../LICENSE $(LOCAL_PATH)/../../PATENTS
include $(BUILD_STATIC_LIBRARY)
#libvpx_test
@@ -48,6 +51,9 @@
LOCAL_STATIC_LIBRARIES += vpx
endif
+LOCAL_LICENSE_KINDS := SPDX-license-identifier-BSD
+LOCAL_LICENSE_CONDITIONS := notice
+LOCAL_NOTICE_FILE := $(LOCAL_PATH)/../../LICENSE $(LOCAL_PATH)/../../PATENTS
include $(LOCAL_PATH)/test/test.mk
LOCAL_C_INCLUDES := $(BINDINGS_DIR)
FILTERED_SRC := $(sort $(filter %.cc %.c, $(LIBVPX_TEST_SRCS-yes)))
--- a/test/encode_api_test.cc
+++ b/test/encode_api_test.cc
@@ -8,6 +8,9 @@
* be found in the AUTHORS file in the root of the source tree.
*/
+#include <climits>
+#include <cstring>
+
#include "third_party/googletest/src/include/gtest/gtest.h"
#include "./vpx_config.h"
@@ -18,6 +21,12 @@
#define NELEMENTS(x) static_cast<int>(sizeof(x) / sizeof(x[0]))
+bool IsVP9(const vpx_codec_iface_t *iface) {
+ static const char kVP9Name[] = "WebM Project VP9";
+ return strncmp(kVP9Name, vpx_codec_iface_name(iface), sizeof(kVP9Name) - 1) ==
+ 0;
+}
+
TEST(EncodeAPI, InvalidParams) {
static const vpx_codec_iface_t *kCodecs[] = {
#if CONFIG_VP8_ENCODER
@@ -184,15 +193,120 @@
}
// VP9 should report incapable, VP8 invalid for all configurations.
- const char kVP9Name[] = "WebM Project VP9";
- const bool is_vp9 = strncmp(kVP9Name, vpx_codec_iface_name(iface),
- sizeof(kVP9Name) - 1) == 0;
- EXPECT_EQ(is_vp9 ? VPX_CODEC_INCAPABLE : VPX_CODEC_INVALID_PARAM,
+ EXPECT_EQ(IsVP9(iface) ? VPX_CODEC_INCAPABLE : VPX_CODEC_INVALID_PARAM,
vpx_codec_enc_init_multi(&enc[0], iface, &cfg[0], 2, 0, &dsf[0]));
for (int i = 0; i < 2; i++) {
vpx_codec_destroy(&enc[i]);
}
+ }
+}
+
+TEST(EncodeAPI, SetRoi) {
+ static struct {
+ const vpx_codec_iface_t *iface;
+ int ctrl_id;
+ } kCodecs[] = {
+#if CONFIG_VP8_ENCODER
+ { &vpx_codec_vp8_cx_algo, VP8E_SET_ROI_MAP },
+#endif
+#if CONFIG_VP9_ENCODER
+ { &vpx_codec_vp9_cx_algo, VP9E_SET_ROI_MAP },
+#endif
+ };
+ constexpr int kWidth = 64;
+ constexpr int kHeight = 64;
+
+ for (const auto &codec : kCodecs) {
+ SCOPED_TRACE(vpx_codec_iface_name(codec.iface));
+ vpx_codec_ctx_t enc;
+ vpx_codec_enc_cfg_t cfg;
+
+ EXPECT_EQ(vpx_codec_enc_config_default(codec.iface, &cfg, 0), VPX_CODEC_OK);
+ cfg.g_w = kWidth;
+ cfg.g_h = kHeight;
+ EXPECT_EQ(vpx_codec_enc_init(&enc, codec.iface, &cfg, 0), VPX_CODEC_OK);
+
+ vpx_roi_map_t roi = {};
+ uint8_t roi_map[kWidth * kHeight] = {};
+ if (IsVP9(codec.iface)) {
+ roi.rows = (cfg.g_w + 7) >> 3;
+ roi.cols = (cfg.g_h + 7) >> 3;
+ } else {
+ roi.rows = (cfg.g_w + 15) >> 4;
+ roi.cols = (cfg.g_h + 15) >> 4;
+ }
+ EXPECT_EQ(vpx_codec_control_(&enc, codec.ctrl_id, &roi), VPX_CODEC_OK);
+
+ roi.roi_map = roi_map;
+ // VP8 only. This value isn't range checked.
+ roi.static_threshold[1] = 1000;
+ roi.static_threshold[2] = INT_MIN;
+ roi.static_threshold[3] = INT_MAX;
+
+ for (const auto delta : { -63, -1, 0, 1, 63 }) {
+ for (int i = 0; i < 8; ++i) {
+ roi.delta_q[i] = delta;
+ roi.delta_lf[i] = delta;
+ // VP9 only.
+ roi.skip[i] ^= 1;
+ roi.ref_frame[i] = (roi.ref_frame[i] + 1) % 4;
+ EXPECT_EQ(vpx_codec_control_(&enc, codec.ctrl_id, &roi), VPX_CODEC_OK);
+ }
+ }
+
+ vpx_codec_err_t expected_error;
+ for (const auto delta : { -64, 64, INT_MIN, INT_MAX }) {
+ expected_error = VPX_CODEC_INVALID_PARAM;
+ for (int i = 0; i < 8; ++i) {
+ roi.delta_q[i] = delta;
+ // The max segment count for VP8 is 4, the remainder of the entries are
+ // ignored.
+ if (i >= 4 && !IsVP9(codec.iface)) expected_error = VPX_CODEC_OK;
+
+ EXPECT_EQ(vpx_codec_control_(&enc, codec.ctrl_id, &roi), expected_error)
+ << "delta_q[" << i << "]: " << delta;
+ roi.delta_q[i] = 0;
+
+ roi.delta_lf[i] = delta;
+ EXPECT_EQ(vpx_codec_control_(&enc, codec.ctrl_id, &roi), expected_error)
+ << "delta_lf[" << i << "]: " << delta;
+ roi.delta_lf[i] = 0;
+ }
+ }
+
+ // VP8 should ignore skip[] and ref_frame[] values.
+ expected_error =
+ IsVP9(codec.iface) ? VPX_CODEC_INVALID_PARAM : VPX_CODEC_OK;
+ for (const auto skip : { -2, 2, INT_MIN, INT_MAX }) {
+ for (int i = 0; i < 8; ++i) {
+ roi.skip[i] = skip;
+ EXPECT_EQ(vpx_codec_control_(&enc, codec.ctrl_id, &roi), expected_error)
+ << "skip[" << i << "]: " << skip;
+ roi.skip[i] = 0;
+ }
+ }
+
+ // VP9 allows negative values to be used to disable segmentation.
+ for (int ref_frame = -3; ref_frame < 0; ++ref_frame) {
+ for (int i = 0; i < 8; ++i) {
+ roi.ref_frame[i] = ref_frame;
+ EXPECT_EQ(vpx_codec_control_(&enc, codec.ctrl_id, &roi), VPX_CODEC_OK)
+ << "ref_frame[" << i << "]: " << ref_frame;
+ roi.ref_frame[i] = 0;
+ }
+ }
+
+ for (const auto ref_frame : { 4, INT_MIN, INT_MAX }) {
+ for (int i = 0; i < 8; ++i) {
+ roi.ref_frame[i] = ref_frame;
+ EXPECT_EQ(vpx_codec_control_(&enc, codec.ctrl_id, &roi), expected_error)
+ << "ref_frame[" << i << "]: " << ref_frame;
+ roi.ref_frame[i] = 0;
+ }
+ }
+
+ EXPECT_EQ(vpx_codec_destroy(&enc), VPX_CODEC_OK);
}
}
--- a/test/ratectrl_rtc_test.cc
+++ /dev/null
@@ -1,229 +1,0 @@
-/*
- * Copyright (c) 2020 The WebM project authors. All Rights Reserved.
- *
- * Use of this source code is governed by a BSD-style license
- * that can be found in the LICENSE file in the root of the source
- * tree. An additional intellectual property rights grant can be found
- * in the file PATENTS. All contributing project authors may
- * be found in the AUTHORS file in the root of the source tree.
- */
-#include "vp9/ratectrl_rtc.h"
-
-#include <fstream> // NOLINT
-#include <string>
-
-#include "./vpx_config.h"
-#include "third_party/googletest/src/include/gtest/gtest.h"
-#include "test/codec_factory.h"
-#include "test/encode_test_driver.h"
-#include "test/util.h"
-#include "test/video_source.h"
-#include "vpx/vpx_codec.h"
-#include "vpx_ports/bitops.h"
-
-namespace {
-
-const size_t kNumFrame = 850;
-
-struct FrameInfo {
- friend std::istream &operator>>(std::istream &is, FrameInfo &info) {
- is >> info.frame_id >> info.spatial_id >> info.temporal_id >> info.base_q >>
- info.target_bandwidth >> info.buffer_level >> info.filter_level_ >>
- info.bytes_used;
- return is;
- }
- int frame_id;
- int spatial_id;
- int temporal_id;
- // Base QP
- int base_q;
- size_t target_bandwidth;
- size_t buffer_level;
- // Loopfilter level
- int filter_level_;
- // Frame size for current frame, used for pose encode update
- size_t bytes_used;
-};
-
-// This test runs the rate control interface and compare against ground truth
-// generated by encoders.
-// Settings for the encoder:
-// For 1 layer:
-//
-// examples/vpx_temporal_svc_encoder gipsrec_motion1.1280_720.yuv out vp9
-// 1280 720 1 30 7 0 0 1 0 1000
-//
-// For SVC (3 temporal layers, 3 spatial layers):
-//
-// examples/vp9_spatial_svc_encoder -f 10000 -w 1280 -h 720 -t 1/30 -sl 3
-// -k 10000 -bl 100,140,200,250,350,500,450,630,900 -b 1600 --rc-end-usage=1
-// --lag-in-frames=0 --passes=1 --speed=7 --threads=1
-// --temporal-layering-mode=3 -aq 1 -rcstat 1
-// gipsrec_motion1.1280_720.yuv -o out.webm
-//
-// - AQ_Mode 0
-// - Disable golden refresh
-// - Bitrate x 2 at frame/superframe 200
-// - Bitrate / 4 at frame/superframe 400
-//
-// The generated file includes:
-// frame number, spatial layer ID, temporal layer ID, base QP, target
-// bandwidth, buffer level, loopfilter level, encoded frame size
-// TODO(jianj): Remove golden files, and run actual encoding in this test.
-class RcInterfaceTest : public ::testing::Test {
- public:
- explicit RcInterfaceTest() {}
-
- virtual ~RcInterfaceTest() {}
-
- protected:
- void RunOneLayer() {
- SetConfigOneLayer();
- rc_api_ = libvpx::VP9RateControlRTC::Create(rc_cfg_);
- FrameInfo frame_info;
- libvpx::VP9FrameParamsQpRTC frame_params;
- frame_params.frame_type = KEY_FRAME;
- frame_params.spatial_layer_id = 0;
- frame_params.temporal_layer_id = 0;
- std::ifstream one_layer_file;
- one_layer_file.open(libvpx_test::GetDataPath() +
- "/rc_interface_test_one_layer");
- ASSERT_TRUE(one_layer_file.good());
- for (size_t i = 0; i < kNumFrame; i++) {
- one_layer_file >> frame_info;
- if (frame_info.frame_id > 0) frame_params.frame_type = INTER_FRAME;
- if (frame_info.frame_id == 200) {
- rc_cfg_.target_bandwidth = rc_cfg_.target_bandwidth * 2;
- rc_api_->UpdateRateControl(rc_cfg_);
- } else if (frame_info.frame_id == 400) {
- rc_cfg_.target_bandwidth = rc_cfg_.target_bandwidth / 4;
- rc_api_->UpdateRateControl(rc_cfg_);
- }
- ASSERT_EQ(frame_info.spatial_id, 0);
- ASSERT_EQ(frame_info.temporal_id, 0);
- rc_api_->ComputeQP(frame_params);
- ASSERT_EQ(rc_api_->GetQP(), frame_info.base_q);
- ASSERT_EQ(rc_api_->GetLoopfilterLevel(), frame_info.filter_level_);
- rc_api_->PostEncodeUpdate(frame_info.bytes_used);
- }
- }
-
- void RunSVC() {
- SetConfigSVC();
- rc_api_ = libvpx::VP9RateControlRTC::Create(rc_cfg_);
- FrameInfo frame_info;
- libvpx::VP9FrameParamsQpRTC frame_params;
- frame_params.frame_type = KEY_FRAME;
- std::ifstream svc_file;
- svc_file.open(std::string(std::getenv("LIBVPX_TEST_DATA_PATH")) +
- "/rc_interface_test_svc");
- ASSERT_TRUE(svc_file.good());
- for (size_t i = 0; i < kNumFrame * rc_cfg_.ss_number_layers; i++) {
- svc_file >> frame_info;
- if (frame_info.frame_id > 0) frame_params.frame_type = INTER_FRAME;
- if (frame_info.frame_id == 200 * rc_cfg_.ss_number_layers) {
- for (int layer = 0;
- layer < rc_cfg_.ss_number_layers * rc_cfg_.ts_number_layers;
- layer++)
- rc_cfg_.layer_target_bitrate[layer] *= 2;
- rc_cfg_.target_bandwidth *= 2;
- rc_api_->UpdateRateControl(rc_cfg_);
- } else if (frame_info.frame_id == 400 * rc_cfg_.ss_number_layers) {
- for (int layer = 0;
- layer < rc_cfg_.ss_number_layers * rc_cfg_.ts_number_layers;
- layer++)
- rc_cfg_.layer_target_bitrate[layer] /= 4;
- rc_cfg_.target_bandwidth /= 4;
- rc_api_->UpdateRateControl(rc_cfg_);
- }
- frame_params.spatial_layer_id = frame_info.spatial_id;
- frame_params.temporal_layer_id = frame_info.temporal_id;
- rc_api_->ComputeQP(frame_params);
- ASSERT_EQ(rc_api_->GetQP(), frame_info.base_q);
- ASSERT_EQ(rc_api_->GetLoopfilterLevel(), frame_info.filter_level_);
- rc_api_->PostEncodeUpdate(frame_info.bytes_used);
- }
- }
-
- private:
- void SetConfigOneLayer() {
- rc_cfg_.width = 1280;
- rc_cfg_.height = 720;
- rc_cfg_.max_quantizer = 52;
- rc_cfg_.min_quantizer = 2;
- rc_cfg_.target_bandwidth = 1000;
- rc_cfg_.buf_initial_sz = 600;
- rc_cfg_.buf_optimal_sz = 600;
- rc_cfg_.buf_sz = 1000;
- rc_cfg_.undershoot_pct = 50;
- rc_cfg_.overshoot_pct = 50;
- rc_cfg_.max_intra_bitrate_pct = 1000;
- rc_cfg_.framerate = 30.0;
- rc_cfg_.ss_number_layers = 1;
- rc_cfg_.ts_number_layers = 1;
- rc_cfg_.scaling_factor_num[0] = 1;
- rc_cfg_.scaling_factor_den[0] = 1;
- rc_cfg_.layer_target_bitrate[0] = 1000;
- rc_cfg_.max_quantizers[0] = 52;
- rc_cfg_.min_quantizers[0] = 2;
- }
-
- void SetConfigSVC() {
- rc_cfg_.width = 1280;
- rc_cfg_.height = 720;
- rc_cfg_.max_quantizer = 56;
- rc_cfg_.min_quantizer = 2;
- rc_cfg_.target_bandwidth = 1600;
- rc_cfg_.buf_initial_sz = 500;
- rc_cfg_.buf_optimal_sz = 600;
- rc_cfg_.buf_sz = 1000;
- rc_cfg_.undershoot_pct = 50;
- rc_cfg_.overshoot_pct = 50;
- rc_cfg_.max_intra_bitrate_pct = 900;
- rc_cfg_.framerate = 30.0;
- rc_cfg_.ss_number_layers = 3;
- rc_cfg_.ts_number_layers = 3;
-
- rc_cfg_.scaling_factor_num[0] = 1;
- rc_cfg_.scaling_factor_den[0] = 4;
- rc_cfg_.scaling_factor_num[1] = 2;
- rc_cfg_.scaling_factor_den[1] = 4;
- rc_cfg_.scaling_factor_num[2] = 4;
- rc_cfg_.scaling_factor_den[2] = 4;
-
- rc_cfg_.ts_rate_decimator[0] = 4;
- rc_cfg_.ts_rate_decimator[1] = 2;
- rc_cfg_.ts_rate_decimator[2] = 1;
-
- rc_cfg_.layer_target_bitrate[0] = 100;
- rc_cfg_.layer_target_bitrate[1] = 140;
- rc_cfg_.layer_target_bitrate[2] = 200;
- rc_cfg_.layer_target_bitrate[3] = 250;
- rc_cfg_.layer_target_bitrate[4] = 350;
- rc_cfg_.layer_target_bitrate[5] = 500;
- rc_cfg_.layer_target_bitrate[6] = 450;
- rc_cfg_.layer_target_bitrate[7] = 630;
- rc_cfg_.layer_target_bitrate[8] = 900;
-
- for (int sl = 0; sl < rc_cfg_.ss_number_layers; ++sl) {
- for (int tl = 0; tl < rc_cfg_.ts_number_layers; ++tl) {
- const int i = sl * rc_cfg_.ts_number_layers + tl;
- rc_cfg_.max_quantizers[i] = 56;
- rc_cfg_.min_quantizers[i] = 2;
- }
- }
- }
-
- std::unique_ptr<libvpx::VP9RateControlRTC> rc_api_;
- libvpx::VP9RateControlRtcConfig rc_cfg_;
-};
-
-TEST_F(RcInterfaceTest, OneLayer) { RunOneLayer(); }
-
-TEST_F(RcInterfaceTest, SVC) { RunSVC(); }
-} // namespace
-
-int main(int argc, char **argv) {
- ::testing::InitGoogleTest(&argc, argv);
- return RUN_ALL_TESTS();
-}
--- a/test/sad_test.cc
+++ b/test/sad_test.cc
@@ -26,6 +26,10 @@
#include "vpx_ports/msvc.h"
#include "vpx_ports/vpx_timer.h"
+// const[expr] should be sufficient for DECLARE_ALIGNED but early
+// implementations of c++11 appear to have some issues with it.
+#define kDataAlignment 32
+
template <typename Function>
struct TestParams {
TestParams(int w, int h, Function f, int bd = -1)
@@ -117,9 +121,6 @@
protected:
// Handle blocks up to 4 blocks 64x64 with stride up to 128
// crbug.com/webm/1660
- // const[expr] should be sufficient for DECLARE_ALIGNED but early
- // implementations of c++11 appear to have some issues with it.
- enum { kDataAlignment = 32 };
static const int kDataBlockSize = 64 * 128;
static const int kDataBufferSize = 4 * kDataBlockSize;
--- a/test/simple_encode_test.cc
+++ b/test/simple_encode_test.cc
@@ -13,6 +13,7 @@
#include <string>
#include <vector>
#include "third_party/googletest/src/include/gtest/gtest.h"
+#include "test/video_source.h"
#include "vp9/simple_encode.h"
namespace vp9 {
@@ -36,7 +37,8 @@
const int frame_rate_den_ = 1;
const int target_bitrate_ = 1000;
const int num_frames_ = 17;
- const std::string in_file_path_str_ = "bus_352x288_420_f20_b8.yuv";
+ const std::string in_file_path_str_ =
+ libvpx_test::GetDataPath() + "/bus_352x288_420_f20_b8.yuv";
};
TEST_F(SimpleEncodeTest, ComputeFirstPassStats) {
--- a/test/svc_datarate_test.cc
+++ b/test/svc_datarate_test.cc
@@ -84,6 +84,7 @@
prev_frame_width[i] = 320;
prev_frame_height[i] = 240;
}
+ ksvc_flex_noupd_tlenh_ = false;
}
virtual void BeginPassHook(unsigned int /*pass*/) {}
@@ -91,9 +92,10 @@
// bypass/flexible mode. The pattern corresponds to the pattern
// VP9E_TEMPORAL_LAYERING_MODE_0101 (temporal_layering_mode == 2) used in
// non-flexible mode, except that we disable inter-layer prediction.
- void set_frame_flags_bypass_mode(
- int tl, int num_spatial_layers, int is_key_frame,
- vpx_svc_ref_frame_config_t *ref_frame_config) {
+ void set_frame_flags_bypass_mode(int tl, int num_spatial_layers,
+ int is_key_frame,
+ vpx_svc_ref_frame_config_t *ref_frame_config,
+ int noupdate_tlenh) {
for (int sl = 0; sl < num_spatial_layers; ++sl)
ref_frame_config->update_buffer_slot[sl] = 0;
@@ -151,9 +153,12 @@
ref_frame_config->reference_last[sl] = 1;
ref_frame_config->reference_golden[sl] = 0;
ref_frame_config->reference_alt_ref[sl] = 0;
- ref_frame_config->update_buffer_slot[sl] |=
- 1 << ref_frame_config->alt_fb_idx[sl];
+ // Non reference frame on top temporal top spatial.
+ ref_frame_config->update_buffer_slot[sl] = 0;
}
+ // Force no update on all spatial layers for temporal enhancement layer
+ // frames.
+ if (noupdate_tlenh) ref_frame_config->update_buffer_slot[sl] = 0;
}
}
}
@@ -244,6 +249,22 @@
}
}
+ if (ksvc_flex_noupd_tlenh_) {
+ vpx_svc_layer_id_t layer_id;
+ layer_id.spatial_layer_id = 0;
+ layer_id.temporal_layer_id = (video->frame() % 2 != 0);
+ temporal_layer_id_ = layer_id.temporal_layer_id;
+ for (int i = 0; i < number_spatial_layers_; i++) {
+ layer_id.temporal_layer_id_per_spatial[i] = temporal_layer_id_;
+ ref_frame_config.duration[i] = 1;
+ }
+ encoder->Control(VP9E_SET_SVC_LAYER_ID, &layer_id);
+ set_frame_flags_bypass_mode(layer_id.temporal_layer_id,
+ number_spatial_layers_, 0, &ref_frame_config,
+ 1);
+ encoder->Control(VP9E_SET_SVC_REF_FRAME_CONFIG, &ref_frame_config);
+ }
+
if (update_pattern_ && video->frame() >= 100) {
vpx_svc_layer_id_t layer_id;
if (video->frame() == 100) {
@@ -254,11 +275,14 @@
layer_id.spatial_layer_id = 0;
layer_id.temporal_layer_id = (video->frame() % 2 != 0);
temporal_layer_id_ = layer_id.temporal_layer_id;
- for (int i = 0; i < number_spatial_layers_; i++)
+ for (int i = 0; i < number_spatial_layers_; i++) {
layer_id.temporal_layer_id_per_spatial[i] = temporal_layer_id_;
+ ref_frame_config.duration[i] = 1;
+ }
encoder->Control(VP9E_SET_SVC_LAYER_ID, &layer_id);
set_frame_flags_bypass_mode(layer_id.temporal_layer_id,
- number_spatial_layers_, 0, &ref_frame_config);
+ number_spatial_layers_, 0, &ref_frame_config,
+ 0);
encoder->Control(VP9E_SET_SVC_REF_FRAME_CONFIG, &ref_frame_config);
}
@@ -557,9 +581,14 @@
}
virtual void MismatchHook(const vpx_image_t *img1, const vpx_image_t *img2) {
- double mismatch_psnr = compute_psnr(img1, img2);
- mismatch_psnr_ += mismatch_psnr;
- ++mismatch_nframes_;
+ // TODO(marpan): Look into why an assert is triggered in compute_psnr
+ // for mismatch frames for the special test case: ksvc_flex_noupd_tlenh.
+ // Has to do with dropped frames in bypass/flexible svc mode.
+ if (!ksvc_flex_noupd_tlenh_) {
+ double mismatch_psnr = compute_psnr(img1, img2);
+ mismatch_psnr_ += mismatch_psnr;
+ ++mismatch_nframes_;
+ }
}
unsigned int GetMismatchFrames() { return mismatch_nframes_; }
@@ -604,6 +633,7 @@
int num_resize_down_;
unsigned int prev_frame_width[VPX_MAX_LAYERS];
unsigned int prev_frame_height[VPX_MAX_LAYERS];
+ bool ksvc_flex_noupd_tlenh_;
private:
virtual void SetConfig(const int num_temporal_layer) {
@@ -722,8 +752,6 @@
cfg_.g_threads = 1;
cfg_.rc_dropframe_thresh = 30;
cfg_.kf_max_dist = 9999;
- // Change SVC pattern on the fly.
- update_pattern_ = 1;
::libvpx_test::I420VideoSource video("niklas_640_480_30.yuv", 640, 480, 30, 1,
0, 400);
top_sl_width_ = 640;
@@ -730,6 +758,8 @@
top_sl_height_ = 480;
cfg_.rc_target_bitrate = 800;
ResetModel();
+ // Change SVC pattern on the fly.
+ update_pattern_ = 1;
AssignLayerBitrates();
ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
CheckLayerRateTargeting(number_spatial_layers_, number_temporal_layers_, 0.78,
@@ -1104,6 +1134,36 @@
// encoder will avoid loopfilter on these frames.
EXPECT_EQ(GetNonRefFrames(), GetMismatchFrames());
#endif
+}
+
+// Check basic rate targeting for 1 pass CBR SVC: 3 spatial layers and
+// 2 temporal layers, for KSVC in flexible mode with no update of reference
+// frames for all spatial layers on TL > 0 superframes.
+// Run HD clip with 4 threads.
+TEST_P(DatarateOnePassCbrSvcFrameDropMultiBR, OnePassCbrSvc3SL2TL4ThKSVCFlex) {
+ SetSvcConfig(3, 2);
+ cfg_.rc_buf_initial_sz = 500;
+ cfg_.rc_buf_optimal_sz = 500;
+ cfg_.rc_buf_sz = 1000;
+ cfg_.rc_min_quantizer = 0;
+ cfg_.rc_max_quantizer = 63;
+ cfg_.g_threads = 4;
+ cfg_.rc_dropframe_thresh = 30;
+ cfg_.kf_max_dist = 9999;
+ ::libvpx_test::Y4mVideoSource video("niklas_1280_720_30.y4m", 0, 60);
+ top_sl_width_ = 1280;
+ top_sl_height_ = 720;
+ layer_framedrop_ = 0;
+ const int bitrates[3] = { 200, 400, 600 };
+ cfg_.rc_target_bitrate = bitrates[GET_PARAM(3)];
+ ResetModel();
+ layer_framedrop_ = GET_PARAM(2);
+ AssignLayerBitrates();
+ ksvc_flex_noupd_tlenh_ = true;
+ cfg_.temporal_layering_mode = VP9E_TEMPORAL_LAYERING_MODE_BYPASS;
+ ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
+ CheckLayerRateTargeting(number_spatial_layers_, number_temporal_layers_, 0.58,
+ 1.2);
}
// Params: speed setting, inter-layer prediction mode.
--- a/test/test-data.mk
+++ b/test/test-data.mk
@@ -27,8 +27,6 @@
LIBVPX_TEST_DATA-$(CONFIG_VP9_ENCODER) += rush_hour_444.y4m
LIBVPX_TEST_DATA-$(CONFIG_VP9_ENCODER) += screendata.y4m
LIBVPX_TEST_DATA-$(CONFIG_VP9_ENCODER) += niklas_640_480_30.yuv
-LIBVPX_TEST_DATA-$(CONFIG_VP9_ENCODER) += rc_interface_test_one_layer
-LIBVPX_TEST_DATA-$(CONFIG_VP9_ENCODER) += rc_interface_test_svc
LIBVPX_TEST_DATA-$(CONFIG_VP9_ENCODER) += bus_352x288_420_f20_b8.yuv
# Test vectors
--- a/test/test-data.sha1
+++ b/test/test-data.sha1
@@ -869,5 +869,3 @@
518a0be998afece76d3df76047d51e256c591ff2 *invalid-bug-148271109.ivf
d3964f9dad9f60363c81b688324d95b4ec7c8038 *invalid-bug-148271109.ivf.res
ad18ca16f0a249fb3b7c38de0d9b327fed273f96 *hantro_collage_w352h288_nv12.yuv
-03f827c0e36ff9a6e23c5cc11936924e4f1827ab *rc_interface_test_one_layer
-99e4f4c2961d46dc286db230090a39d78460b25d *rc_interface_test_svc
--- a/test/test.mk
+++ b/test/test.mk
@@ -193,10 +193,8 @@
endif
ifeq ($(CONFIG_VP9_ENCODER)$(CONFIG_VP9_TEMPORAL_DENOISING),yesyes)
-ifneq (, $(filter yes, $(HAVE_SSE2) $(HAVE_AVX2)))
LIBVPX_TEST_SRCS-yes += vp9_denoiser_test.cc
endif
-endif
LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += vp9_arf_freq_test.cc
ifeq ($(CONFIG_VP9_ENCODER),yes)
@@ -215,7 +213,14 @@
TEST_INTRA_PRED_SPEED_SRCS-yes := test_intra_pred_speed.cc
TEST_INTRA_PRED_SPEED_SRCS-yes += ../md5_utils.h ../md5_utils.c
-RC_INTERFACE_TEST_SRCS-$(CONFIG_VP9_ENCODER) := ratectrl_rtc_test.cc
+RC_INTERFACE_TEST_SRCS-yes := test_rc_interface.cc
+RC_INTERFACE_TEST_SRCS-$(CONFIG_VP9_ENCODER) += vp9_ratectrl_rtc_test.cc
+RC_INTERFACE_TEST_SRCS-$(CONFIG_VP8_ENCODER) += vp8_ratectrl_rtc_test.cc
+RC_INTERFACE_TEST_SRCS-$(CONFIG_ENCODERS) += encode_test_driver.cc
+RC_INTERFACE_TEST_SRCS-$(CONFIG_ENCODERS) += encode_test_driver.h
+RC_INTERFACE_TEST_SRCS-yes += decode_test_driver.cc
+RC_INTERFACE_TEST_SRCS-yes += decode_test_driver.h
+RC_INTERFACE_TEST_SRCS-yes += codec_factory.h
endif # CONFIG_SHARED
--- /dev/null
+++ b/test/test_rc_interface.cc
@@ -1,0 +1,6 @@
+#include "third_party/googletest/src/include/gtest/gtest.h"
+
+int main(int argc, char **argv) {
+ ::testing::InitGoogleTest(&argc, argv);
+ return RUN_ALL_TESTS();
+}
--- a/test/vp8_denoiser_sse2_test.cc
+++ b/test/vp8_denoiser_sse2_test.cc
@@ -40,7 +40,12 @@
int increase_denoising_;
};
+// TODO(https://crbug.com/webm/1718): This test fails with gcc 8-10.
+#if defined(__GNUC__) && __GNUC__ >= 8
+TEST_P(VP8DenoiserTest, DISABLED_BitexactCheck) {
+#else
TEST_P(VP8DenoiserTest, BitexactCheck) {
+#endif
ACMRandom rnd(ACMRandom::DeterministicSeed());
const int count_test_block = 4000;
const int stride = 16;
@@ -87,7 +92,7 @@
// Check bitexactness.
for (int h = 0; h < 16; ++h) {
for (int w = 0; w < 16; ++w) {
- EXPECT_EQ(avg_block_c[h * stride + w], avg_block_sse2[h * stride + w]);
+ ASSERT_EQ(avg_block_c[h * stride + w], avg_block_sse2[h * stride + w]);
}
}
@@ -103,7 +108,7 @@
// Check bitexactness.
for (int h = 0; h < 16; ++h) {
for (int w = 0; w < 16; ++w) {
- EXPECT_EQ(avg_block_c[h * stride + w], avg_block_sse2[h * stride + w]);
+ ASSERT_EQ(avg_block_c[h * stride + w], avg_block_sse2[h * stride + w]);
}
}
}
--- /dev/null
+++ b/test/vp8_ratectrl_rtc_test.cc
@@ -1,0 +1,343 @@
+/*
+ * Copyright (c) 2021 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include <fstream> // NOLINT
+#include <string>
+
+#include "./vpx_config.h"
+#include "third_party/googletest/src/include/gtest/gtest.h"
+#include "test/codec_factory.h"
+#include "test/encode_test_driver.h"
+#include "test/i420_video_source.h"
+#include "test/util.h"
+#include "test/video_source.h"
+#include "vp8/vp8_ratectrl_rtc.h"
+#include "vpx/vpx_codec.h"
+#include "vpx_ports/bitops.h"
+
+namespace {
+
+struct Vp8RCTestVideo {
+ Vp8RCTestVideo() {}
+ Vp8RCTestVideo(const char *name_, int width_, int height_,
+ unsigned int frames_)
+ : name(name_), width(width_), height(height_), frames(frames_) {}
+
+ friend std::ostream &operator<<(std::ostream &os,
+ const Vp8RCTestVideo &video) {
+ os << video.name << " " << video.width << " " << video.height << " "
+ << video.frames;
+ return os;
+ }
+ const char *name;
+ int width;
+ int height;
+ unsigned int frames;
+};
+
+const Vp8RCTestVideo kVp8RCTestVectors[] = {
+ Vp8RCTestVideo("niklas_640_480_30.yuv", 640, 480, 470),
+ Vp8RCTestVideo("desktop_office1.1280_720-020.yuv", 1280, 720, 300),
+};
+
+class Vp8RcInterfaceTest
+ : public ::libvpx_test::EncoderTest,
+ public ::libvpx_test::CodecTestWith2Params<int, Vp8RCTestVideo> {
+ public:
+ Vp8RcInterfaceTest()
+ : EncoderTest(GET_PARAM(0)), key_interval_(3000), encoder_exit_(false) {}
+ virtual ~Vp8RcInterfaceTest() {}
+
+ protected:
+ virtual void SetUp() {
+ InitializeConfig();
+ SetMode(::libvpx_test::kRealTime);
+ }
+
+ // From error_resilience_test.cc
+ int SetFrameFlags(int frame_num, int num_temp_layers) {
+ int frame_flags = 0;
+ if (num_temp_layers == 2) {
+ if (frame_num % 2 == 0) {
+ // Layer 0: predict from L and ARF, update L.
+ frame_flags =
+ VP8_EFLAG_NO_REF_GF | VP8_EFLAG_NO_UPD_GF | VP8_EFLAG_NO_UPD_ARF;
+ } else {
+ // Layer 1: predict from L, G and ARF, and update G.
+ frame_flags = VP8_EFLAG_NO_UPD_ARF | VP8_EFLAG_NO_UPD_LAST |
+ VP8_EFLAG_NO_UPD_ENTROPY;
+ }
+ } else if (num_temp_layers == 3) {
+ if (frame_num % 4 == 0) {
+ // Layer 0: predict from L, update L.
+ frame_flags = VP8_EFLAG_NO_UPD_GF | VP8_EFLAG_NO_UPD_ARF |
+ VP8_EFLAG_NO_REF_GF | VP8_EFLAG_NO_REF_ARF;
+ } else if ((frame_num - 2) % 4 == 0) {
+ // Layer 1: predict from L, G, update G.
+ frame_flags =
+ VP8_EFLAG_NO_UPD_ARF | VP8_EFLAG_NO_UPD_LAST | VP8_EFLAG_NO_REF_ARF;
+ } else if ((frame_num - 1) % 2 == 0) {
+ // Layer 2: predict from L, G, ARF; update ARG.
+ frame_flags = VP8_EFLAG_NO_UPD_GF | VP8_EFLAG_NO_UPD_LAST;
+ }
+ }
+ return frame_flags;
+ }
+
+ int SetLayerId(int frame_num, int num_temp_layers) {
+ int layer_id = 0;
+ if (num_temp_layers == 2) {
+ if (frame_num % 2 == 0) {
+ layer_id = 0;
+ } else {
+ layer_id = 1;
+ }
+ } else if (num_temp_layers == 3) {
+ if (frame_num % 4 == 0) {
+ layer_id = 0;
+ } else if ((frame_num - 2) % 4 == 0) {
+ layer_id = 1;
+ } else if ((frame_num - 1) % 2 == 0) {
+ layer_id = 2;
+ }
+ }
+ return layer_id;
+ }
+
+ virtual void PreEncodeFrameHook(::libvpx_test::VideoSource *video,
+ ::libvpx_test::Encoder *encoder) {
+ if (rc_cfg_.ts_number_layers > 1) {
+ const int layer_id = SetLayerId(video->frame(), cfg_.ts_number_layers);
+ const int frame_flags =
+ SetFrameFlags(video->frame(), cfg_.ts_number_layers);
+ frame_params_.temporal_layer_id = layer_id;
+ if (video->frame() > 0) {
+ encoder->Control(VP8E_SET_TEMPORAL_LAYER_ID, layer_id);
+ encoder->Control(VP8E_SET_FRAME_FLAGS, frame_flags);
+ }
+ } else {
+ if (video->frame() == 0) {
+ encoder->Control(VP8E_SET_CPUUSED, -6);
+ encoder->Control(VP8E_SET_RTC_EXTERNAL_RATECTRL, 1);
+ encoder->Control(VP8E_SET_MAX_INTRA_BITRATE_PCT, 1000);
+ }
+ if (frame_params_.frame_type == INTER_FRAME) {
+ // Disable golden frame update.
+ frame_flags_ |= VP8_EFLAG_NO_UPD_GF;
+ frame_flags_ |= VP8_EFLAG_NO_UPD_ARF;
+ }
+ }
+ frame_params_.frame_type =
+ video->frame() % key_interval_ == 0 ? KEY_FRAME : INTER_FRAME;
+ encoder_exit_ = video->frame() == test_video_.frames;
+ }
+
+ virtual void PostEncodeFrameHook(::libvpx_test::Encoder *encoder) {
+ if (encoder_exit_) {
+ return;
+ }
+ int qp;
+ encoder->Control(VP8E_GET_LAST_QUANTIZER, &qp);
+ rc_api_->ComputeQP(frame_params_);
+ ASSERT_EQ(rc_api_->GetQP(), qp);
+ }
+
+ virtual void FramePktHook(const vpx_codec_cx_pkt_t *pkt) {
+ rc_api_->PostEncodeUpdate(pkt->data.frame.sz);
+ }
+
+ void RunOneLayer() {
+ test_video_ = GET_PARAM(2);
+ target_bitrate_ = GET_PARAM(1);
+ if (test_video_.width == 1280 && target_bitrate_ == 200) return;
+ if (test_video_.width == 640 && target_bitrate_ == 1000) return;
+ SetConfig();
+ rc_api_ = libvpx::VP8RateControlRTC::Create(rc_cfg_);
+ rc_api_->UpdateRateControl(rc_cfg_);
+
+ ::libvpx_test::I420VideoSource video(test_video_.name, test_video_.width,
+ test_video_.height, 30, 1, 0,
+ test_video_.frames);
+
+ ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
+ }
+
+ void RunPeriodicKey() {
+ test_video_ = GET_PARAM(2);
+ target_bitrate_ = GET_PARAM(1);
+ if (test_video_.width == 1280 && target_bitrate_ == 200) return;
+ if (test_video_.width == 640 && target_bitrate_ == 1000) return;
+ key_interval_ = 100;
+ SetConfig();
+ rc_api_ = libvpx::VP8RateControlRTC::Create(rc_cfg_);
+ rc_api_->UpdateRateControl(rc_cfg_);
+
+ ::libvpx_test::I420VideoSource video(test_video_.name, test_video_.width,
+ test_video_.height, 30, 1, 0,
+ test_video_.frames);
+
+ ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
+ }
+
+ void RunTemporalLayers2TL() {
+ test_video_ = GET_PARAM(2);
+ target_bitrate_ = GET_PARAM(1);
+ if (test_video_.width == 1280 && target_bitrate_ == 200) return;
+ if (test_video_.width == 640 && target_bitrate_ == 1000) return;
+ SetConfigTemporalLayers(2);
+ rc_api_ = libvpx::VP8RateControlRTC::Create(rc_cfg_);
+ rc_api_->UpdateRateControl(rc_cfg_);
+
+ ::libvpx_test::I420VideoSource video(test_video_.name, test_video_.width,
+ test_video_.height, 30, 1, 0,
+ test_video_.frames);
+
+ ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
+ }
+
+ void RunTemporalLayers3TL() {
+ test_video_ = GET_PARAM(2);
+ target_bitrate_ = GET_PARAM(1);
+ if (test_video_.width == 1280 && target_bitrate_ == 200) return;
+ if (test_video_.width == 640 && target_bitrate_ == 1000) return;
+ SetConfigTemporalLayers(3);
+ rc_api_ = libvpx::VP8RateControlRTC::Create(rc_cfg_);
+ rc_api_->UpdateRateControl(rc_cfg_);
+
+ ::libvpx_test::I420VideoSource video(test_video_.name, test_video_.width,
+ test_video_.height, 30, 1, 0,
+ test_video_.frames);
+
+ ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
+ }
+
+ private:
+ void SetConfig() {
+ rc_cfg_.width = test_video_.width;
+ rc_cfg_.height = test_video_.height;
+ rc_cfg_.max_quantizer = 60;
+ rc_cfg_.min_quantizer = 2;
+ rc_cfg_.target_bandwidth = target_bitrate_;
+ rc_cfg_.buf_initial_sz = 600;
+ rc_cfg_.buf_optimal_sz = 600;
+ rc_cfg_.buf_sz = target_bitrate_;
+ rc_cfg_.undershoot_pct = 50;
+ rc_cfg_.overshoot_pct = 50;
+ rc_cfg_.max_intra_bitrate_pct = 1000;
+ rc_cfg_.framerate = 30.0;
+ rc_cfg_.layer_target_bitrate[0] = target_bitrate_;
+
+ // Encoder settings for ground truth.
+ cfg_.g_w = test_video_.width;
+ cfg_.g_h = test_video_.height;
+ cfg_.rc_undershoot_pct = 50;
+ cfg_.rc_overshoot_pct = 50;
+ cfg_.rc_buf_initial_sz = 600;
+ cfg_.rc_buf_optimal_sz = 600;
+ cfg_.rc_buf_sz = target_bitrate_;
+ cfg_.rc_dropframe_thresh = 0;
+ cfg_.rc_min_quantizer = 2;
+ cfg_.rc_max_quantizer = 60;
+ cfg_.rc_end_usage = VPX_CBR;
+ cfg_.g_lag_in_frames = 0;
+ cfg_.g_error_resilient = 1;
+ cfg_.rc_target_bitrate = target_bitrate_;
+ cfg_.kf_min_dist = key_interval_;
+ cfg_.kf_max_dist = key_interval_;
+ }
+
+ void SetConfigTemporalLayers(int temporal_layers) {
+ rc_cfg_.width = test_video_.width;
+ rc_cfg_.height = test_video_.height;
+ rc_cfg_.max_quantizer = 60;
+ rc_cfg_.min_quantizer = 2;
+ rc_cfg_.target_bandwidth = target_bitrate_;
+ rc_cfg_.buf_initial_sz = 600;
+ rc_cfg_.buf_optimal_sz = 600;
+ rc_cfg_.buf_sz = target_bitrate_;
+ rc_cfg_.undershoot_pct = 50;
+ rc_cfg_.overshoot_pct = 50;
+ rc_cfg_.max_intra_bitrate_pct = 1000;
+ rc_cfg_.framerate = 30.0;
+ if (temporal_layers == 2) {
+ rc_cfg_.layer_target_bitrate[0] = 60 * target_bitrate_ / 100;
+ rc_cfg_.layer_target_bitrate[1] = target_bitrate_;
+ rc_cfg_.ts_rate_decimator[0] = 2;
+ rc_cfg_.ts_rate_decimator[1] = 1;
+ } else if (temporal_layers == 3) {
+ rc_cfg_.layer_target_bitrate[0] = 40 * target_bitrate_ / 100;
+ rc_cfg_.layer_target_bitrate[1] = 60 * target_bitrate_ / 100;
+ rc_cfg_.layer_target_bitrate[2] = target_bitrate_;
+ rc_cfg_.ts_rate_decimator[0] = 4;
+ rc_cfg_.ts_rate_decimator[1] = 2;
+ rc_cfg_.ts_rate_decimator[2] = 1;
+ }
+
+ rc_cfg_.ts_number_layers = temporal_layers;
+
+ // Encoder settings for ground truth.
+ cfg_.g_w = test_video_.width;
+ cfg_.g_h = test_video_.height;
+ cfg_.rc_undershoot_pct = 50;
+ cfg_.rc_overshoot_pct = 50;
+ cfg_.rc_buf_initial_sz = 600;
+ cfg_.rc_buf_optimal_sz = 600;
+ cfg_.rc_buf_sz = target_bitrate_;
+ cfg_.rc_dropframe_thresh = 0;
+ cfg_.rc_min_quantizer = 2;
+ cfg_.rc_max_quantizer = 60;
+ cfg_.rc_end_usage = VPX_CBR;
+ cfg_.g_lag_in_frames = 0;
+ cfg_.g_error_resilient = 1;
+ cfg_.rc_target_bitrate = target_bitrate_;
+ cfg_.kf_min_dist = key_interval_;
+ cfg_.kf_max_dist = key_interval_;
+ // 2 Temporal layers, no spatial layers, CBR mode.
+ cfg_.ss_number_layers = 1;
+ cfg_.ts_number_layers = temporal_layers;
+ if (temporal_layers == 2) {
+ cfg_.ts_rate_decimator[0] = 2;
+ cfg_.ts_rate_decimator[1] = 1;
+ cfg_.ts_periodicity = 2;
+ cfg_.ts_target_bitrate[0] = 60 * cfg_.rc_target_bitrate / 100;
+ cfg_.ts_target_bitrate[1] = cfg_.rc_target_bitrate;
+ } else if (temporal_layers == 3) {
+ cfg_.ts_rate_decimator[0] = 4;
+ cfg_.ts_rate_decimator[1] = 2;
+ cfg_.ts_rate_decimator[2] = 1;
+ cfg_.ts_periodicity = 4;
+ cfg_.ts_target_bitrate[0] = 40 * cfg_.rc_target_bitrate / 100;
+ cfg_.ts_target_bitrate[1] = 60 * cfg_.rc_target_bitrate / 100;
+ cfg_.ts_target_bitrate[2] = cfg_.rc_target_bitrate;
+ }
+ }
+
+ std::unique_ptr<libvpx::VP8RateControlRTC> rc_api_;
+ libvpx::VP8RateControlRtcConfig rc_cfg_;
+ int key_interval_;
+ int target_bitrate_;
+ Vp8RCTestVideo test_video_;
+ libvpx::VP8FrameParamsQpRTC frame_params_;
+ bool encoder_exit_;
+};
+
+TEST_P(Vp8RcInterfaceTest, OneLayer) { RunOneLayer(); }
+
+TEST_P(Vp8RcInterfaceTest, OneLayerPeriodicKey) { RunPeriodicKey(); }
+
+TEST_P(Vp8RcInterfaceTest, TemporalLayers2TL) { RunTemporalLayers2TL(); }
+
+TEST_P(Vp8RcInterfaceTest, TemporalLayers3TL) { RunTemporalLayers3TL(); }
+
+VP8_INSTANTIATE_TEST_SUITE(Vp8RcInterfaceTest,
+ ::testing::Values(200, 400, 1000),
+ ::testing::ValuesIn(kVp8RCTestVectors));
+
+} // namespace
--- a/test/vp9_end_to_end_test.cc
+++ b/test/vp9_end_to_end_test.cc
@@ -31,7 +31,7 @@
{ 36.0, 37.0, 37.0, 37.0, 37.0 }, { 35.0, 36.0, 36.0, 36.0, 36.0 },
{ 34.0, 35.0, 35.0, 35.0, 35.0 }, { 33.0, 34.0, 34.0, 34.0, 34.0 },
{ 32.0, 33.0, 33.0, 33.0, 33.0 }, { 28.0, 32.0, 32.0, 32.0, 32.0 },
- { 28.5, 31.0, 31.0, 31.0, 31.0 }, { 27.5, 30.0, 30.0, 30.0, 30.0 },
+ { 28.4, 31.0, 31.0, 31.0, 31.0 }, { 27.5, 30.0, 30.0, 30.0, 30.0 },
};
typedef struct {
@@ -342,7 +342,7 @@
VP9_INSTANTIATE_TEST_SUITE(EndToEndNV12,
::testing::Values(::libvpx_test::kRealTime),
::testing::ValuesIn(kTestVectorsNv12),
- ::testing::ValuesIn({ 6, 7, 8 }));
+ ::testing::Values(6, 7, 8));
VP9_INSTANTIATE_TEST_SUITE(EndToEndTestAdaptiveRDThresh,
::testing::Values(5, 6, 7), ::testing::Values(8, 9));
--- a/test/vp9_ext_ratectrl_test.cc
+++ b/test/vp9_ext_ratectrl_test.cc
@@ -8,6 +8,7 @@
* be found in the AUTHORS file in the root of the source tree.
*/
+#include <cstdint>
#include <new>
#include "test/codec_factory.h"
@@ -20,7 +21,7 @@
namespace {
constexpr int kModelMagicNumber = 51396;
-constexpr unsigned int PrivMagicNumber = 5566;
+constexpr uintptr_t PrivMagicNumber = 5566;
constexpr int kFrameNum = 5;
constexpr int kLosslessCodingIndex = 2;
@@ -73,6 +74,7 @@
EXPECT_EQ(encode_frame_info->coding_index, toy_rate_ctrl->coding_index);
if (encode_frame_info->coding_index == 0) {
+ EXPECT_EQ(encode_frame_info->show_index, 0);
EXPECT_EQ(encode_frame_info->gop_index, 0);
EXPECT_EQ(encode_frame_info->frame_type, 0 /*kFrameTypeKey*/);
EXPECT_EQ(encode_frame_info->ref_frame_valid_list[0],
@@ -84,6 +86,7 @@
}
if (encode_frame_info->coding_index == 1) {
+ EXPECT_EQ(encode_frame_info->show_index, 4);
EXPECT_EQ(encode_frame_info->gop_index, 1);
EXPECT_EQ(encode_frame_info->frame_type, 2 /*kFrameTypeAltRef*/);
EXPECT_EQ(encode_frame_info->ref_frame_valid_list[0],
@@ -104,6 +107,7 @@
}
if (encode_frame_info->coding_index == 5) {
+ EXPECT_EQ(encode_frame_info->show_index, 4);
EXPECT_EQ(encode_frame_info->gop_index, 0);
EXPECT_EQ(encode_frame_info->frame_type, 3 /*kFrameTypeOverlay*/);
EXPECT_EQ(encode_frame_info->ref_frame_valid_list[0],
@@ -125,6 +129,7 @@
} else {
frame_decision->q_index = 100;
}
+ frame_decision->max_frame_size = 0;
return VPX_RC_OK;
}
@@ -139,6 +144,11 @@
EXPECT_EQ(encode_frame_result->pixel_count, ref_pixel_count);
if (toy_rate_ctrl->coding_index == kLosslessCodingIndex) {
EXPECT_EQ(encode_frame_result->sse, 0);
+ }
+ if (toy_rate_ctrl->coding_index == kLosslessCodingIndex) {
+ EXPECT_EQ(encode_frame_result->actual_encoding_qindex, 0);
+ } else {
+ EXPECT_EQ(encode_frame_result->actual_encoding_qindex, 100);
}
return VPX_RC_OK;
}
--- /dev/null
+++ b/test/vp9_ratectrl_rtc_test.cc
@@ -1,0 +1,373 @@
+/*
+ * Copyright (c) 2020 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+#include "vp9/ratectrl_rtc.h"
+
+#include <fstream> // NOLINT
+#include <string>
+
+#include "./vpx_config.h"
+#include "third_party/googletest/src/include/gtest/gtest.h"
+#include "test/codec_factory.h"
+#include "test/encode_test_driver.h"
+#include "test/i420_video_source.h"
+#include "test/util.h"
+#include "test/video_source.h"
+#include "vpx/vpx_codec.h"
+#include "vpx_ports/bitops.h"
+
+namespace {
+
+const size_t kNumFrames = 300;
+
+const int kTemporalId[4] = { 0, 2, 1, 2 };
+
+class RcInterfaceTest
+ : public ::libvpx_test::EncoderTest,
+ public ::libvpx_test::CodecTestWith2Params<int, vpx_rc_mode> {
+ public:
+ RcInterfaceTest()
+ : EncoderTest(GET_PARAM(0)), aq_mode_(GET_PARAM(1)), key_interval_(3000),
+ encoder_exit_(false) {}
+
+ virtual ~RcInterfaceTest() {}
+
+ protected:
+ virtual void SetUp() {
+ InitializeConfig();
+ SetMode(::libvpx_test::kRealTime);
+ }
+
+ virtual void PreEncodeFrameHook(libvpx_test::VideoSource *video,
+ libvpx_test::Encoder *encoder) {
+ if (video->frame() == 0) {
+ encoder->Control(VP8E_SET_CPUUSED, 7);
+ encoder->Control(VP9E_SET_AQ_MODE, aq_mode_);
+ encoder->Control(VP9E_SET_TUNE_CONTENT, 0);
+ encoder->Control(VP8E_SET_MAX_INTRA_BITRATE_PCT, 1000);
+ encoder->Control(VP9E_SET_RTC_EXTERNAL_RATECTRL, 1);
+ }
+ frame_params_.frame_type =
+ video->frame() % key_interval_ == 0 ? KEY_FRAME : INTER_FRAME;
+ if (rc_cfg_.rc_mode == VPX_CBR && frame_params_.frame_type == INTER_FRAME) {
+ // Disable golden frame update.
+ frame_flags_ |= VP8_EFLAG_NO_UPD_GF;
+ frame_flags_ |= VP8_EFLAG_NO_UPD_ARF;
+ }
+ encoder_exit_ = video->frame() == kNumFrames;
+ }
+
+ virtual void PostEncodeFrameHook(::libvpx_test::Encoder *encoder) {
+ if (encoder_exit_) {
+ return;
+ }
+ int loopfilter_level, qp;
+ encoder->Control(VP9E_GET_LOOPFILTER_LEVEL, &loopfilter_level);
+ encoder->Control(VP8E_GET_LAST_QUANTIZER, &qp);
+ rc_api_->ComputeQP(frame_params_);
+ ASSERT_EQ(rc_api_->GetQP(), qp);
+ ASSERT_EQ(rc_api_->GetLoopfilterLevel(), loopfilter_level);
+ }
+
+ virtual void FramePktHook(const vpx_codec_cx_pkt_t *pkt) {
+ rc_api_->PostEncodeUpdate(pkt->data.frame.sz);
+ }
+
+ void RunOneLayer() {
+ SetConfig(GET_PARAM(2));
+ rc_api_ = libvpx::VP9RateControlRTC::Create(rc_cfg_);
+ frame_params_.spatial_layer_id = 0;
+ frame_params_.temporal_layer_id = 0;
+
+ ::libvpx_test::I420VideoSource video("desktop_office1.1280_720-020.yuv",
+ 1280, 720, 30, 1, 0, kNumFrames);
+
+ ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
+ }
+
+ void RunOneLayerVBRPeriodicKey() {
+ if (GET_PARAM(2) != VPX_VBR) return;
+ key_interval_ = 100;
+ SetConfig(VPX_VBR);
+ rc_api_ = libvpx::VP9RateControlRTC::Create(rc_cfg_);
+ frame_params_.spatial_layer_id = 0;
+ frame_params_.temporal_layer_id = 0;
+
+ ::libvpx_test::I420VideoSource video("desktop_office1.1280_720-020.yuv",
+ 1280, 720, 30, 1, 0, kNumFrames);
+
+ ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
+ }
+
+ private:
+ void SetConfig(vpx_rc_mode rc_mode) {
+ rc_cfg_.width = 1280;
+ rc_cfg_.height = 720;
+ rc_cfg_.max_quantizer = 52;
+ rc_cfg_.min_quantizer = 2;
+ rc_cfg_.target_bandwidth = 1000;
+ rc_cfg_.buf_initial_sz = 600;
+ rc_cfg_.buf_optimal_sz = 600;
+ rc_cfg_.buf_sz = 1000;
+ rc_cfg_.undershoot_pct = 50;
+ rc_cfg_.overshoot_pct = 50;
+ rc_cfg_.max_intra_bitrate_pct = 1000;
+ rc_cfg_.framerate = 30.0;
+ rc_cfg_.ss_number_layers = 1;
+ rc_cfg_.ts_number_layers = 1;
+ rc_cfg_.scaling_factor_num[0] = 1;
+ rc_cfg_.scaling_factor_den[0] = 1;
+ rc_cfg_.layer_target_bitrate[0] = 1000;
+ rc_cfg_.max_quantizers[0] = 52;
+ rc_cfg_.min_quantizers[0] = 2;
+ rc_cfg_.rc_mode = rc_mode;
+ rc_cfg_.aq_mode = aq_mode_;
+
+ // Encoder settings for ground truth.
+ cfg_.g_w = 1280;
+ cfg_.g_h = 720;
+ cfg_.rc_undershoot_pct = 50;
+ cfg_.rc_overshoot_pct = 50;
+ cfg_.rc_buf_initial_sz = 600;
+ cfg_.rc_buf_optimal_sz = 600;
+ cfg_.rc_buf_sz = 1000;
+ cfg_.rc_dropframe_thresh = 0;
+ cfg_.rc_min_quantizer = 2;
+ cfg_.rc_max_quantizer = 52;
+ cfg_.rc_end_usage = rc_mode;
+ cfg_.g_lag_in_frames = 0;
+ cfg_.g_error_resilient = 0;
+ cfg_.rc_target_bitrate = 1000;
+ cfg_.kf_min_dist = key_interval_;
+ cfg_.kf_max_dist = key_interval_;
+ }
+
+ std::unique_ptr<libvpx::VP9RateControlRTC> rc_api_;
+ libvpx::VP9RateControlRtcConfig rc_cfg_;
+ int aq_mode_;
+ int key_interval_;
+ libvpx::VP9FrameParamsQpRTC frame_params_;
+ bool encoder_exit_;
+};
+
+class RcInterfaceSvcTest : public ::libvpx_test::EncoderTest,
+ public ::libvpx_test::CodecTestWithParam<int> {
+ public:
+ RcInterfaceSvcTest() : EncoderTest(GET_PARAM(0)), aq_mode_(GET_PARAM(1)) {}
+ virtual ~RcInterfaceSvcTest() {}
+
+ protected:
+ virtual void SetUp() {
+ InitializeConfig();
+ SetMode(::libvpx_test::kRealTime);
+ }
+
+ virtual void PreEncodeFrameHook(libvpx_test::VideoSource *video,
+ ::libvpx_test::Encoder *encoder) {
+ if (video->frame() == 0) {
+ encoder->Control(VP8E_SET_CPUUSED, 7);
+ encoder->Control(VP9E_SET_AQ_MODE, aq_mode_);
+ encoder->Control(VP9E_SET_TUNE_CONTENT, 0);
+ encoder->Control(VP8E_SET_MAX_INTRA_BITRATE_PCT, 900);
+ encoder->Control(VP9E_SET_RTC_EXTERNAL_RATECTRL, 1);
+ encoder->Control(VP9E_SET_SVC, 1);
+ encoder->Control(VP9E_SET_SVC_PARAMETERS, &svc_params_);
+ }
+
+ frame_params_.frame_type = video->frame() == 0 ? KEY_FRAME : INTER_FRAME;
+ if (rc_cfg_.rc_mode == VPX_CBR && frame_params_.frame_type == INTER_FRAME) {
+ // Disable golden frame update.
+ frame_flags_ |= VP8_EFLAG_NO_UPD_GF;
+ frame_flags_ |= VP8_EFLAG_NO_UPD_ARF;
+ }
+ encoder_exit_ = video->frame() == kNumFrames;
+ current_superframe_ = video->frame();
+ }
+
+ virtual void PostEncodeFrameHook(::libvpx_test::Encoder *encoder) {
+ ::libvpx_test::CxDataIterator iter = encoder->GetCxData();
+ while (const vpx_codec_cx_pkt_t *pkt = iter.Next()) {
+ ParseSuperframeSizes(static_cast<const uint8_t *>(pkt->data.frame.buf),
+ pkt->data.frame.sz);
+ for (int sl = 0; sl < rc_cfg_.ss_number_layers; sl++) {
+ frame_params_.spatial_layer_id = sl;
+ frame_params_.temporal_layer_id = kTemporalId[current_superframe_ % 4];
+ rc_api_->ComputeQP(frame_params_);
+ frame_params_.frame_type = INTER_FRAME;
+ rc_api_->PostEncodeUpdate(sizes_[sl]);
+ }
+ }
+ if (!encoder_exit_) {
+ int loopfilter_level, qp;
+ encoder->Control(VP9E_GET_LOOPFILTER_LEVEL, &loopfilter_level);
+ encoder->Control(VP8E_GET_LAST_QUANTIZER, &qp);
+ ASSERT_EQ(rc_api_->GetQP(), qp);
+ ASSERT_EQ(rc_api_->GetLoopfilterLevel(), loopfilter_level);
+ }
+ }
+ // This method needs to be overridden because non-reference frames are
+ // expected to be mismatched frames as the encoder will avoid loopfilter on
+ // these frames.
+ virtual void MismatchHook(const vpx_image_t * /*img1*/,
+ const vpx_image_t * /*img2*/) {}
+
+ void RunSvc() {
+ SetConfigSvc();
+ rc_api_ = libvpx::VP9RateControlRTC::Create(rc_cfg_);
+ SetEncoderSvc();
+
+ ::libvpx_test::I420VideoSource video("desktop_office1.1280_720-020.yuv",
+ 1280, 720, 30, 1, 0, kNumFrames);
+
+ ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
+ }
+
+ private:
+ vpx_codec_err_t ParseSuperframeSizes(const uint8_t *data, size_t data_sz) {
+ uint8_t marker = *(data + data_sz - 1);
+ if ((marker & 0xe0) == 0xc0) {
+ const uint32_t frames = (marker & 0x7) + 1;
+ const uint32_t mag = ((marker >> 3) & 0x3) + 1;
+ const size_t index_sz = 2 + mag * frames;
+ // This chunk is marked as having a superframe index but doesn't have
+ // enough data for it, thus it's an invalid superframe index.
+ if (data_sz < index_sz) return VPX_CODEC_CORRUPT_FRAME;
+ {
+ const uint8_t marker2 = *(data + data_sz - index_sz);
+ // This chunk is marked as having a superframe index but doesn't have
+ // the matching marker byte at the front of the index therefore it's an
+ // invalid chunk.
+ if (marker != marker2) return VPX_CODEC_CORRUPT_FRAME;
+ }
+ const uint8_t *x = &data[data_sz - index_sz + 1];
+ for (uint32_t i = 0; i < frames; ++i) {
+ uint32_t this_sz = 0;
+
+ for (uint32_t j = 0; j < mag; ++j) this_sz |= (*x++) << (j * 8);
+ sizes_[i] = this_sz;
+ }
+ }
+ return VPX_CODEC_OK;
+ }
+
+ void SetEncoderSvc() {
+ cfg_.ss_number_layers = 3;
+ cfg_.ts_number_layers = 3;
+ cfg_.g_timebase.num = 1;
+ cfg_.g_timebase.den = 30;
+ svc_params_.scaling_factor_num[0] = 72;
+ svc_params_.scaling_factor_den[0] = 288;
+ svc_params_.scaling_factor_num[1] = 144;
+ svc_params_.scaling_factor_den[1] = 288;
+ svc_params_.scaling_factor_num[2] = 288;
+ svc_params_.scaling_factor_den[2] = 288;
+ for (int i = 0; i < VPX_MAX_LAYERS; ++i) {
+ svc_params_.max_quantizers[i] = 56;
+ svc_params_.min_quantizers[i] = 2;
+ svc_params_.speed_per_layer[i] = 7;
+ }
+ cfg_.rc_end_usage = VPX_CBR;
+ cfg_.g_lag_in_frames = 0;
+ cfg_.g_error_resilient = 0;
+ // 3 temporal layers
+ cfg_.ts_rate_decimator[0] = 4;
+ cfg_.ts_rate_decimator[1] = 2;
+ cfg_.ts_rate_decimator[2] = 1;
+ cfg_.temporal_layering_mode = 3;
+
+ cfg_.rc_buf_initial_sz = 500;
+ cfg_.rc_buf_optimal_sz = 600;
+ cfg_.rc_buf_sz = 1000;
+ cfg_.rc_min_quantizer = 2;
+ cfg_.rc_max_quantizer = 56;
+ cfg_.g_threads = 1;
+ cfg_.kf_max_dist = 9999;
+ cfg_.rc_target_bitrate = 1600;
+ cfg_.rc_overshoot_pct = 50;
+ cfg_.rc_undershoot_pct = 50;
+
+ cfg_.layer_target_bitrate[0] = 100;
+ cfg_.layer_target_bitrate[1] = 140;
+ cfg_.layer_target_bitrate[2] = 200;
+ cfg_.layer_target_bitrate[3] = 250;
+ cfg_.layer_target_bitrate[4] = 350;
+ cfg_.layer_target_bitrate[5] = 500;
+ cfg_.layer_target_bitrate[6] = 450;
+ cfg_.layer_target_bitrate[7] = 630;
+ cfg_.layer_target_bitrate[8] = 900;
+ }
+
+ void SetConfigSvc() {
+ rc_cfg_.width = 1280;
+ rc_cfg_.height = 720;
+ rc_cfg_.max_quantizer = 56;
+ rc_cfg_.min_quantizer = 2;
+ rc_cfg_.target_bandwidth = 1600;
+ rc_cfg_.buf_initial_sz = 500;
+ rc_cfg_.buf_optimal_sz = 600;
+ rc_cfg_.buf_sz = 1000;
+ rc_cfg_.undershoot_pct = 50;
+ rc_cfg_.overshoot_pct = 50;
+ rc_cfg_.max_intra_bitrate_pct = 900;
+ rc_cfg_.framerate = 30.0;
+ rc_cfg_.ss_number_layers = 3;
+ rc_cfg_.ts_number_layers = 3;
+ rc_cfg_.rc_mode = VPX_CBR;
+ rc_cfg_.aq_mode = aq_mode_;
+
+ rc_cfg_.scaling_factor_num[0] = 1;
+ rc_cfg_.scaling_factor_den[0] = 4;
+ rc_cfg_.scaling_factor_num[1] = 2;
+ rc_cfg_.scaling_factor_den[1] = 4;
+ rc_cfg_.scaling_factor_num[2] = 4;
+ rc_cfg_.scaling_factor_den[2] = 4;
+
+ rc_cfg_.ts_rate_decimator[0] = 4;
+ rc_cfg_.ts_rate_decimator[1] = 2;
+ rc_cfg_.ts_rate_decimator[2] = 1;
+
+ rc_cfg_.layer_target_bitrate[0] = 100;
+ rc_cfg_.layer_target_bitrate[1] = 140;
+ rc_cfg_.layer_target_bitrate[2] = 200;
+ rc_cfg_.layer_target_bitrate[3] = 250;
+ rc_cfg_.layer_target_bitrate[4] = 350;
+ rc_cfg_.layer_target_bitrate[5] = 500;
+ rc_cfg_.layer_target_bitrate[6] = 450;
+ rc_cfg_.layer_target_bitrate[7] = 630;
+ rc_cfg_.layer_target_bitrate[8] = 900;
+
+ for (int sl = 0; sl < rc_cfg_.ss_number_layers; ++sl) {
+ for (int tl = 0; tl < rc_cfg_.ts_number_layers; ++tl) {
+ const int i = sl * rc_cfg_.ts_number_layers + tl;
+ rc_cfg_.max_quantizers[i] = 56;
+ rc_cfg_.min_quantizers[i] = 2;
+ }
+ }
+ }
+
+ int aq_mode_;
+ std::unique_ptr<libvpx::VP9RateControlRTC> rc_api_;
+ libvpx::VP9RateControlRtcConfig rc_cfg_;
+ vpx_svc_extra_cfg_t svc_params_;
+ libvpx::VP9FrameParamsQpRTC frame_params_;
+ bool encoder_exit_;
+ int current_superframe_;
+ uint32_t sizes_[8];
+};
+
+TEST_P(RcInterfaceTest, OneLayer) { RunOneLayer(); }
+
+TEST_P(RcInterfaceTest, OneLayerVBRPeriodicKey) { RunOneLayerVBRPeriodicKey(); }
+
+TEST_P(RcInterfaceSvcTest, Svc) { RunSvc(); }
+
+VP9_INSTANTIATE_TEST_SUITE(RcInterfaceTest, ::testing::Values(0, 3),
+ ::testing::Values(VPX_CBR, VPX_VBR));
+VP9_INSTANTIATE_TEST_SUITE(RcInterfaceSvcTest, ::testing::Values(0, 3));
+} // namespace
--- a/test/y4m_test.cc
+++ b/test/y4m_test.cc
@@ -188,4 +188,55 @@
INSTANTIATE_TEST_SUITE_P(C, Y4mVideoWriteTest,
::testing::ValuesIn(kY4mTestVectors));
+
+static const char kY4MRegularHeader[] =
+ "YUV4MPEG2 W4 H4 F30:1 Ip A0:0 C420jpeg XYSCSS=420JPEG\n"
+ "FRAME\n"
+ "012345678912345601230123";
+
+TEST(Y4MHeaderTest, RegularHeader) {
+ libvpx_test::TempOutFile f;
+ fwrite(kY4MRegularHeader, 1, sizeof(kY4MRegularHeader), f.file());
+ fflush(f.file());
+ EXPECT_EQ(0, fseek(f.file(), 0, 0));
+
+ y4m_input y4m;
+ EXPECT_EQ(y4m_input_open(&y4m, f.file(), /*skip_buffer=*/NULL,
+ /*num_skip=*/0, /*only_420=*/0),
+ 0);
+ EXPECT_EQ(y4m.pic_w, 4);
+ EXPECT_EQ(y4m.pic_h, 4);
+ EXPECT_EQ(y4m.fps_n, 30);
+ EXPECT_EQ(y4m.fps_d, 1);
+ EXPECT_EQ(y4m.interlace, 'p');
+ EXPECT_EQ(strcmp("420jpeg", y4m.chroma_type), 0);
+ y4m_input_close(&y4m);
+}
+
+// Testing that headers over 100 characters can be parsed.
+static const char kY4MLongHeader[] =
+ "YUV4MPEG2 W4 H4 F30:1 Ip A0:0 C420jpeg XYSCSS=420JPEG "
+ "XCOLORRANGE=LIMITED XSOME_UNKNOWN_METADATA XOTHER_UNKNOWN_METADATA\n"
+ "FRAME\n"
+ "012345678912345601230123";
+
+TEST(Y4MHeaderTest, LongHeader) {
+ libvpx_test::TempOutFile f;
+ fwrite(kY4MLongHeader, 1, sizeof(kY4MLongHeader), f.file());
+ fflush(f.file());
+ EXPECT_EQ(fseek(f.file(), 0, 0), 0);
+
+ y4m_input y4m;
+ EXPECT_EQ(y4m_input_open(&y4m, f.file(), /*skip_buffer=*/NULL,
+ /*num_skip=*/0, /*only_420=*/0),
+ 0);
+ EXPECT_EQ(y4m.pic_w, 4);
+ EXPECT_EQ(y4m.pic_h, 4);
+ EXPECT_EQ(y4m.fps_n, 30);
+ EXPECT_EQ(y4m.fps_d, 1);
+ EXPECT_EQ(y4m.interlace, 'p');
+ EXPECT_EQ(strcmp("420jpeg", y4m.chroma_type), 0);
+ y4m_input_close(&y4m);
+}
+
} // namespace
--- a/third_party/libwebm/Android.mk
+++ b/third_party/libwebm/Android.mk
@@ -3,7 +3,7 @@
include $(CLEAR_VARS)
LOCAL_MODULE:= libwebm
LOCAL_CPPFLAGS:=-D__STDC_CONSTANT_MACROS -D__STDC_FORMAT_MACROS
-LOCAL_CPPFLAGS+=-D__STDC_LIMIT_MACROS -std=c++11
+LOCAL_CPPFLAGS+=-D__STDC_LIMIT_MACROS -std=gnu++11
LOCAL_C_INCLUDES:= $(LOCAL_PATH)
LOCAL_EXPORT_C_INCLUDES:= $(LOCAL_PATH)
--- a/tools.mk
+++ b/tools.mk
@@ -79,6 +79,7 @@
--ver=$$(CONFIG_VS_VERSION)\
--proj-guid=$$($$(@:.$(VCPROJ_SFX)=).GUID)\
--src-path-bare="$(SRC_PATH_BARE)" \
+ --as=$$(AS) \
$$(if $$(CONFIG_STATIC_MSVCRT),--static-crt) \
--out=$$@ $$(INTERNAL_CFLAGS) $$(CFLAGS) \
$$(INTERNAL_LDFLAGS) $$(LDFLAGS) $$^
--- a/tools_common.h
+++ b/tools_common.h
@@ -110,6 +110,8 @@
#if defined(__GNUC__)
#define VPX_NO_RETURN __attribute__((noreturn))
+#elif defined(_MSC_VER)
+#define VPX_NO_RETURN __declspec(noreturn)
#else
#define VPX_NO_RETURN
#endif
@@ -117,14 +119,14 @@
/* Sets a stdio stream into binary mode */
FILE *set_binary_mode(FILE *stream);
-void die(const char *fmt, ...) VPX_NO_RETURN;
-void fatal(const char *fmt, ...) VPX_NO_RETURN;
+VPX_NO_RETURN void die(const char *fmt, ...);
+VPX_NO_RETURN void fatal(const char *fmt, ...);
void warn(const char *fmt, ...);
-void die_codec(vpx_codec_ctx_t *ctx, const char *s) VPX_NO_RETURN;
+VPX_NO_RETURN void die_codec(vpx_codec_ctx_t *ctx, const char *s);
/* The tool including this file must define usage_exit() */
-void usage_exit(void) VPX_NO_RETURN;
+VPX_NO_RETURN void usage_exit(void);
#undef VPX_NO_RETURN
--- a/vp8/decoder/threading.c
+++ b/vp8/decoder/threading.c
@@ -10,7 +10,7 @@
#include "vpx_config.h"
#include "vp8_rtcd.h"
-#if !defined(WIN32) && CONFIG_OS_SUPPORT == 1
+#if !defined(_WIN32) && CONFIG_OS_SUPPORT == 1
#include <unistd.h>
#endif
#include "onyxd_int.h"
--- a/vp8/encoder/bitstream.c
+++ b/vp8/encoder/bitstream.c
@@ -866,7 +866,6 @@
#if !(CONFIG_REALTIME_ONLY & CONFIG_ONTHEFLY_BITPACKING)
vp8_writer *const w = cpi->bc;
#endif
- int savings = 0;
vpx_clear_system_state();
@@ -940,8 +939,6 @@
#if !(CONFIG_REALTIME_ONLY & CONFIG_ONTHEFLY_BITPACKING)
vp8_write_literal(w, newp, 8);
#endif
-
- savings += s;
}
} while (++t < ENTROPY_NODES);
--- a/vp8/encoder/onyx_if.c
+++ b/vp8/encoder/onyx_if.c
@@ -183,7 +183,7 @@
extern FILE *vpxlogc;
#endif
-static void save_layer_context(VP8_COMP *cpi) {
+void vp8_save_layer_context(VP8_COMP *cpi) {
LAYER_CONTEXT *lc = &cpi->layer_context[cpi->current_layer];
/* Save layer dependent coding state */
@@ -222,7 +222,7 @@
sizeof(cpi->mb.count_mb_ref_frame_usage));
}
-static void restore_layer_context(VP8_COMP *cpi, const int layer) {
+void vp8_restore_layer_context(VP8_COMP *cpi, const int layer) {
LAYER_CONTEXT *lc = &cpi->layer_context[layer];
/* Restore layer dependent coding state */
@@ -269,9 +269,9 @@
return (int)(llval * llnum / llden);
}
-static void init_temporal_layer_context(VP8_COMP *cpi, VP8_CONFIG *oxcf,
- const int layer,
- double prev_layer_framerate) {
+void vp8_init_temporal_layer_context(VP8_COMP *cpi, VP8_CONFIG *oxcf,
+ const int layer,
+ double prev_layer_framerate) {
LAYER_CONTEXT *lc = &cpi->layer_context[layer];
lc->framerate = cpi->output_framerate / cpi->oxcf.rate_decimator[layer];
@@ -301,9 +301,9 @@
/* Work out the average size of a frame within this layer */
if (layer > 0) {
lc->avg_frame_size_for_layer =
- (int)((cpi->oxcf.target_bitrate[layer] -
- cpi->oxcf.target_bitrate[layer - 1]) *
- 1000 / (lc->framerate - prev_layer_framerate));
+ (int)round((cpi->oxcf.target_bitrate[layer] -
+ cpi->oxcf.target_bitrate[layer - 1]) *
+ 1000 / (lc->framerate - prev_layer_framerate));
}
lc->active_worst_quality = cpi->oxcf.worst_allowed_q;
@@ -336,12 +336,12 @@
// We need this to set the layer context for the new layers below.
if (prev_num_layers == 1) {
cpi->current_layer = 0;
- save_layer_context(cpi);
+ vp8_save_layer_context(cpi);
}
for (i = 0; i < curr_num_layers; ++i) {
LAYER_CONTEXT *lc = &cpi->layer_context[i];
if (i >= prev_num_layers) {
- init_temporal_layer_context(cpi, oxcf, i, prev_layer_framerate);
+ vp8_init_temporal_layer_context(cpi, oxcf, i, prev_layer_framerate);
}
// The initial buffer levels are set based on their starting levels.
// We could set the buffer levels based on the previous state (normalized
@@ -356,7 +356,7 @@
// state (to smooth-out quality dips/rate fluctuation at transition)?
// We need to treat the 1 layer case separately: oxcf.target_bitrate[i]
- // is not set for 1 layer, and the restore_layer_context/save_context()
+ // is not set for 1 layer, and the vp8_restore_layer_context/save_context()
// are not called in the encoding loop, so we need to call it here to
// pass the layer context state to |cpi|.
if (curr_num_layers == 1) {
@@ -364,7 +364,7 @@
lc->buffer_level =
cpi->oxcf.starting_buffer_level_in_ms * lc->target_bandwidth / 1000;
lc->bits_off_target = lc->buffer_level;
- restore_layer_context(cpi, 0);
+ vp8_restore_layer_context(cpi, 0);
}
prev_layer_framerate = cpi->output_framerate / cpi->oxcf.rate_decimator[i];
}
@@ -1274,7 +1274,7 @@
cpi->framerate = framerate;
cpi->output_framerate = framerate;
cpi->per_frame_bandwidth =
- (int)(cpi->oxcf.target_bandwidth / cpi->output_framerate);
+ (int)round(cpi->oxcf.target_bandwidth / cpi->output_framerate);
cpi->av_per_frame_bandwidth = cpi->per_frame_bandwidth;
cpi->min_frame_bandwidth = (int)(cpi->av_per_frame_bandwidth *
cpi->oxcf.two_pass_vbrmin_section / 100);
@@ -1365,7 +1365,7 @@
double prev_layer_framerate = 0;
for (i = 0; i < cpi->oxcf.number_of_layers; ++i) {
- init_temporal_layer_context(cpi, oxcf, i, prev_layer_framerate);
+ vp8_init_temporal_layer_context(cpi, oxcf, i, prev_layer_framerate);
prev_layer_framerate =
cpi->output_framerate / cpi->oxcf.rate_decimator[i];
}
@@ -1382,7 +1382,7 @@
#endif
}
-static void update_layer_contexts(VP8_COMP *cpi) {
+void vp8_update_layer_contexts(VP8_COMP *cpi) {
VP8_CONFIG *oxcf = &cpi->oxcf;
/* Update snapshots of the layer contexts to reflect new parameters */
@@ -1417,8 +1417,8 @@
/* Work out the average size of a frame within this layer */
if (i > 0) {
lc->avg_frame_size_for_layer =
- (int)((oxcf->target_bitrate[i] - oxcf->target_bitrate[i - 1]) *
- 1000 / (lc->framerate - prev_layer_framerate));
+ (int)round((oxcf->target_bitrate[i] - oxcf->target_bitrate[i - 1]) *
+ 1000 / (lc->framerate - prev_layer_framerate));
}
prev_layer_framerate = lc->framerate;
@@ -1910,6 +1910,7 @@
cpi->force_maxqp = 0;
cpi->frames_since_last_drop_overshoot = 0;
+ cpi->rt_always_update_correction_factor = 0;
cpi->b_calculate_psnr = CONFIG_INTERNAL_STATS;
#if CONFIG_INTERNAL_STATS
@@ -3260,7 +3261,7 @@
#endif // !CONFIG_REALTIME_ONLY
default:
cpi->per_frame_bandwidth =
- (int)(cpi->target_bandwidth / cpi->output_framerate);
+ (int)round(cpi->target_bandwidth / cpi->output_framerate);
break;
}
@@ -3480,7 +3481,7 @@
* Note that dropping a key frame can be problematic if spatial
* resampling is also active
*/
- if (cpi->decimation_factor > 0) {
+ if (cpi->decimation_factor > 0 && cpi->drop_frames_allowed) {
switch (cpi->decimation_factor) {
case 1:
cpi->per_frame_bandwidth = cpi->per_frame_bandwidth * 3 / 2;
@@ -4016,7 +4017,8 @@
if (frame_over_shoot_limit == 0) frame_over_shoot_limit = 1;
/* Are we are overshooting and up against the limit of active max Q. */
- if (((cpi->pass != 2) ||
+ if (!cpi->rt_always_update_correction_factor &&
+ ((cpi->pass != 2) ||
(cpi->oxcf.end_usage == USAGE_STREAM_FROM_SERVER)) &&
(Q == cpi->active_worst_quality) &&
(cpi->active_worst_quality < cpi->worst_quality) &&
@@ -4514,10 +4516,10 @@
cpi->bits_off_target = cpi->oxcf.maximum_buffer_size;
}
- // If the frame dropper is not enabled, don't let the buffer level go below
- // some threshold, given here by -|maximum_buffer_size|. For now we only do
- // this for screen content input.
- if (cpi->drop_frames_allowed == 0 && cpi->oxcf.screen_content_mode &&
+ // Don't let the buffer level go below some threshold, given here
+ // by -|maximum_buffer_size|. For now we only do this for
+ // screen content input.
+ if (cpi->oxcf.screen_content_mode &&
cpi->bits_off_target < -cpi->oxcf.maximum_buffer_size) {
cpi->bits_off_target = -cpi->oxcf.maximum_buffer_size;
}
@@ -4552,8 +4554,8 @@
for (i = cpi->current_layer + 1; i < cpi->oxcf.number_of_layers; ++i) {
LAYER_CONTEXT *lc = &cpi->layer_context[i];
- int bits_off_for_this_layer = (int)(lc->target_bandwidth / lc->framerate -
- cpi->projected_frame_size);
+ int bits_off_for_this_layer = (int)round(
+ lc->target_bandwidth / lc->framerate - cpi->projected_frame_size);
lc->bits_off_target += bits_off_for_this_layer;
@@ -4919,6 +4921,8 @@
this_duration = cpi->source->ts_end - cpi->last_end_time_stamp_seen;
last_duration = cpi->last_end_time_stamp_seen - cpi->last_time_stamp_seen;
+ // Cap this to avoid overflow of (this_duration - last_duration) * 10
+ this_duration = VPXMIN(this_duration, INT64_MAX / 10);
/* do a step update if the duration changes by 10% */
if (last_duration) {
step = (int)(((this_duration - last_duration) * 10 / last_duration));
@@ -4988,7 +4992,7 @@
if (cpi->oxcf.number_of_layers > 1) {
int layer;
- update_layer_contexts(cpi);
+ vp8_update_layer_contexts(cpi);
/* Restore layer specific context & set frame rate */
if (cpi->temporal_layer_id >= 0) {
@@ -4998,7 +5002,7 @@
cpi->oxcf
.layer_id[cpi->temporal_pattern_counter % cpi->oxcf.periodicity];
}
- restore_layer_context(cpi, layer);
+ vp8_restore_layer_context(cpi, layer);
vp8_new_framerate(cpi, cpi->layer_context[layer].framerate);
}
@@ -5129,7 +5133,7 @@
}
/* Save layer specific state */
- if (cpi->oxcf.number_of_layers > 1) save_layer_context(cpi);
+ if (cpi->oxcf.number_of_layers > 1) vp8_save_layer_context(cpi);
vpx_usec_timer_mark(&cmptimer);
cpi->time_compress_data += vpx_usec_timer_elapsed(&cmptimer);
@@ -5316,17 +5320,13 @@
return -1;
}
- // Range check the delta Q values and convert the external Q range values
- // to internal ones.
- if ((abs(delta_q[0]) > range) || (abs(delta_q[1]) > range) ||
- (abs(delta_q[2]) > range) || (abs(delta_q[3]) > range)) {
- return -1;
- }
-
- // Range check the delta lf values
- if ((abs(delta_lf[0]) > range) || (abs(delta_lf[1]) > range) ||
- (abs(delta_lf[2]) > range) || (abs(delta_lf[3]) > range)) {
- return -1;
+ for (i = 0; i < MAX_MB_SEGMENTS; ++i) {
+ // Note abs() alone can't be used as the behavior of abs(INT_MIN) is
+ // undefined.
+ if (delta_q[i] > range || delta_q[i] < -range || delta_lf[i] > range ||
+ delta_lf[i] < -range) {
+ return -1;
+ }
}
// Also disable segmentation if no deltas are specified.
--- a/vp8/encoder/onyx_int.h
+++ b/vp8/encoder/onyx_int.h
@@ -702,6 +702,10 @@
int use_roi_static_threshold;
int ext_refresh_frame_flags_pending;
+
+ // Always update correction factor used for rate control after each frame for
+ // realtime encoding.
+ int rt_always_update_correction_factor;
} VP8_COMP;
void vp8_initialize_enc(void);
@@ -708,6 +712,12 @@
void vp8_alloc_compressor_data(VP8_COMP *cpi);
int vp8_reverse_trans(int x);
+void vp8_init_temporal_layer_context(VP8_COMP *cpi, VP8_CONFIG *oxcf,
+ const int layer,
+ double prev_layer_framerate);
+void vp8_update_layer_contexts(VP8_COMP *cpi);
+void vp8_save_layer_context(VP8_COMP *cpi);
+void vp8_restore_layer_context(VP8_COMP *cpi, const int layer);
void vp8_new_framerate(VP8_COMP *cpi, double framerate);
void vp8_loopfilter_frame(VP8_COMP *cpi, VP8_COMMON *cm);
--- a/vp8/encoder/ratectrl.c
+++ b/vp8/encoder/ratectrl.c
@@ -327,7 +327,8 @@
int initial_boost = 32; /* |3.0 * per_frame_bandwidth| */
/* Boost depends somewhat on frame rate: only used for 1 layer case. */
if (cpi->oxcf.number_of_layers == 1) {
- kf_boost = VPXMAX(initial_boost, (int)(2 * cpi->output_framerate - 16));
+ kf_boost =
+ VPXMAX(initial_boost, (int)round(2 * cpi->output_framerate - 16));
} else {
/* Initial factor: set target size to: |3.0 * per_frame_bandwidth|. */
kf_boost = initial_boost;
@@ -349,8 +350,12 @@
}
if (cpi->oxcf.rc_max_intra_bitrate_pct) {
- unsigned int max_rate =
- cpi->per_frame_bandwidth * cpi->oxcf.rc_max_intra_bitrate_pct / 100;
+ unsigned int max_rate;
+ // This product may overflow unsigned int
+ uint64_t product = cpi->per_frame_bandwidth;
+ product *= cpi->oxcf.rc_max_intra_bitrate_pct;
+ product /= 100;
+ max_rate = (unsigned int)VPXMIN(INT_MAX, product);
if (target > max_rate) target = max_rate;
}
--- a/vp8/vp8_cx_iface.c
+++ b/vp8/vp8_cx_iface.c
@@ -152,8 +152,8 @@
RANGE_CHECK_HI(cfg, g_lag_in_frames, 25);
#endif
RANGE_CHECK(cfg, rc_end_usage, VPX_VBR, VPX_Q);
- RANGE_CHECK_HI(cfg, rc_undershoot_pct, 1000);
- RANGE_CHECK_HI(cfg, rc_overshoot_pct, 1000);
+ RANGE_CHECK_HI(cfg, rc_undershoot_pct, 100);
+ RANGE_CHECK_HI(cfg, rc_overshoot_pct, 100);
RANGE_CHECK_HI(cfg, rc_2pass_vbr_bias_pct, 100);
RANGE_CHECK(cfg, kf_mode, VPX_KF_DISABLED, VPX_KF_AUTO);
@@ -257,6 +257,23 @@
ERROR("g_threads cannot be bigger than number of token partitions");
#endif
+ // The range below shall be further tuned.
+ RANGE_CHECK(cfg, use_vizier_rc_params, 0, 1);
+ RANGE_CHECK(cfg, active_wq_factor.den, 1, 1000);
+ RANGE_CHECK(cfg, err_per_mb_factor.den, 1, 1000);
+ RANGE_CHECK(cfg, sr_default_decay_limit.den, 1, 1000);
+ RANGE_CHECK(cfg, sr_diff_factor.den, 1, 1000);
+ RANGE_CHECK(cfg, kf_err_per_mb_factor.den, 1, 1000);
+ RANGE_CHECK(cfg, kf_frame_min_boost_factor.den, 1, 1000);
+ RANGE_CHECK(cfg, kf_frame_max_boost_subs_factor.den, 1, 1000);
+ RANGE_CHECK(cfg, kf_max_total_boost_factor.den, 1, 1000);
+ RANGE_CHECK(cfg, gf_max_total_boost_factor.den, 1, 1000);
+ RANGE_CHECK(cfg, gf_frame_max_boost_factor.den, 1, 1000);
+ RANGE_CHECK(cfg, zm_factor.den, 1, 1000);
+ RANGE_CHECK(cfg, rd_mult_inter_qp_fac.den, 1, 1000);
+ RANGE_CHECK(cfg, rd_mult_arf_qp_fac.den, 1, 1000);
+ RANGE_CHECK(cfg, rd_mult_key_qp_fac.den, 1, 1000);
+
return VPX_CODEC_OK;
}
@@ -378,6 +395,9 @@
#endif
oxcf->cpu_used = vp8_cfg.cpu_used;
+ if (cfg.g_pass == VPX_RC_FIRST_PASS) {
+ oxcf->cpu_used = VPXMAX(4, oxcf->cpu_used);
+ }
oxcf->encode_breakout = vp8_cfg.static_thresh;
oxcf->play_alternate = vp8_cfg.enable_auto_alt_ref;
oxcf->noise_sensitivity = vp8_cfg.noise_sensitivity;
@@ -585,6 +605,17 @@
return update_extracfg(ctx, &extra_cfg);
}
+static vpx_codec_err_t ctrl_set_rtc_external_ratectrl(vpx_codec_alg_priv_t *ctx,
+ va_list args) {
+ VP8_COMP *cpi = ctx->cpi;
+ const unsigned int data = CAST(VP8E_SET_GF_CBR_BOOST_PCT, args);
+ if (data) {
+ cpi->cyclic_refresh_mode_enabled = 0;
+ cpi->rt_always_update_correction_factor = 1;
+ }
+ return VPX_CODEC_OK;
+}
+
static vpx_codec_err_t vp8e_mr_alloc_mem(const vpx_codec_enc_cfg_t *cfg,
void **mem_loc) {
vpx_codec_err_t res = VPX_CODEC_OK;
@@ -1223,6 +1254,7 @@
{ VP8E_SET_MAX_INTRA_BITRATE_PCT, set_rc_max_intra_bitrate_pct },
{ VP8E_SET_SCREEN_CONTENT_MODE, set_screen_content_mode },
{ VP8E_SET_GF_CBR_BOOST_PCT, ctrl_set_rc_gf_cbr_boost_pct },
+ { VP8E_SET_RTC_EXTERNAL_RATECTRL, ctrl_set_rtc_external_ratectrl },
{ -1, NULL },
};
@@ -1256,7 +1288,7 @@
VPX_VBR, /* rc_end_usage */
{ NULL, 0 }, /* rc_twopass_stats_in */
{ NULL, 0 }, /* rc_firstpass_mb_stats_in */
- 256, /* rc_target_bandwidth */
+ 256, /* rc_target_bitrate */
4, /* rc_min_quantizer */
63, /* rc_max_quantizer */
100, /* rc_undershoot_pct */
@@ -1278,14 +1310,30 @@
VPX_SS_DEFAULT_LAYERS, /* ss_number_layers */
{ 0 },
- { 0 }, /* ss_target_bitrate */
- 1, /* ts_number_layers */
- { 0 }, /* ts_target_bitrate */
- { 0 }, /* ts_rate_decimator */
- 0, /* ts_periodicity */
- { 0 }, /* ts_layer_id */
- { 0 }, /* layer_target_bitrate */
- 0 /* temporal_layering_mode */
+ { 0 }, /* ss_target_bitrate */
+ 1, /* ts_number_layers */
+ { 0 }, /* ts_target_bitrate */
+ { 0 }, /* ts_rate_decimator */
+ 0, /* ts_periodicity */
+ { 0 }, /* ts_layer_id */
+ { 0 }, /* layer_target_bitrate */
+ 0, /* temporal_layering_mode */
+ 0, /* use_vizier_rc_params */
+ { 1, 1 }, /* active_wq_factor */
+ { 1, 1 }, /* err_per_mb_factor */
+ { 1, 1 }, /* sr_default_decay_limit */
+ { 1, 1 }, /* sr_diff_factor */
+ { 1, 1 }, /* kf_err_per_mb_factor */
+ { 1, 1 }, /* kf_frame_min_boost_factor */
+ { 1, 1 }, /* kf_frame_max_boost_first_factor */
+ { 1, 1 }, /* kf_frame_max_boost_subs_factor */
+ { 1, 1 }, /* kf_max_total_boost_factor */
+ { 1, 1 }, /* gf_max_total_boost_factor */
+ { 1, 1 }, /* gf_frame_max_boost_factor */
+ { 1, 1 }, /* zm_factor */
+ { 1, 1 }, /* rd_mult_inter_qp_fac */
+ { 1, 1 }, /* rd_mult_arf_qp_fac */
+ { 1, 1 }, /* rd_mult_key_qp_fac */
} },
};
--- /dev/null
+++ b/vp8/vp8_ratectrl_rtc.cc
@@ -1,0 +1,347 @@
+/*
+ * Copyright (c) 2021 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include <math.h>
+#include <new>
+#include "vp8/vp8_ratectrl_rtc.h"
+#include "vp8/encoder/ratectrl.h"
+#include "vpx_ports/system_state.h"
+
+namespace libvpx {
+/* Quant MOD */
+static const int kQTrans[] = {
+ 0, 1, 2, 3, 4, 5, 7, 8, 9, 10, 12, 13, 15, 17, 18, 19,
+ 20, 21, 23, 24, 25, 26, 27, 28, 29, 30, 31, 33, 35, 37, 39, 41,
+ 43, 45, 47, 49, 51, 53, 55, 57, 59, 61, 64, 67, 70, 73, 76, 79,
+ 82, 85, 88, 91, 94, 97, 100, 103, 106, 109, 112, 115, 118, 121, 124, 127,
+};
+
+static const unsigned char kf_high_motion_minq[QINDEX_RANGE] = {
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1,
+ 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 5,
+ 5, 5, 5, 5, 5, 6, 6, 6, 6, 7, 7, 8, 8, 8, 8, 9, 9, 10, 10,
+ 10, 10, 11, 11, 11, 11, 12, 12, 13, 13, 13, 13, 14, 14, 15, 15, 15, 15, 16,
+ 16, 16, 16, 17, 17, 18, 18, 18, 18, 19, 19, 20, 20, 20, 20, 21, 21, 21, 21,
+ 22, 22, 23, 23, 24, 25, 25, 26, 26, 27, 28, 28, 29, 30
+};
+
+static const unsigned char inter_minq[QINDEX_RANGE] = {
+ 0, 0, 1, 1, 2, 3, 3, 4, 4, 5, 6, 6, 7, 8, 8, 9, 9, 10, 11,
+ 11, 12, 13, 13, 14, 15, 15, 16, 17, 17, 18, 19, 20, 20, 21, 22, 22, 23, 24,
+ 24, 25, 26, 27, 27, 28, 29, 30, 30, 31, 32, 33, 33, 34, 35, 36, 36, 37, 38,
+ 39, 39, 40, 41, 42, 42, 43, 44, 45, 46, 46, 47, 48, 49, 50, 50, 51, 52, 53,
+ 54, 55, 55, 56, 57, 58, 59, 60, 60, 61, 62, 63, 64, 65, 66, 67, 67, 68, 69,
+ 70, 71, 72, 73, 74, 75, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 86,
+ 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100
+};
+
+static int rescale(int val, int num, int denom) {
+ int64_t llnum = num;
+ int64_t llden = denom;
+ int64_t llval = val;
+
+ return (int)(llval * llnum / llden);
+}
+
+std::unique_ptr<VP8RateControlRTC> VP8RateControlRTC::Create(
+ const VP8RateControlRtcConfig &cfg) {
+ std::unique_ptr<VP8RateControlRTC> rc_api(new (std::nothrow)
+ VP8RateControlRTC());
+ if (!rc_api) return nullptr;
+ rc_api->cpi_ = static_cast<VP8_COMP *>(vpx_memalign(32, sizeof(*cpi_)));
+ if (!rc_api->cpi_) return nullptr;
+ vp8_zero(*rc_api->cpi_);
+
+ rc_api->InitRateControl(cfg);
+
+ return rc_api;
+}
+
+void VP8RateControlRTC::InitRateControl(const VP8RateControlRtcConfig &rc_cfg) {
+ VP8_COMMON *cm = &cpi_->common;
+ VP8_CONFIG *oxcf = &cpi_->oxcf;
+ oxcf->end_usage = USAGE_STREAM_FROM_SERVER;
+ cpi_->pass = 0;
+ cm->show_frame = 1;
+ oxcf->drop_frames_water_mark = 0;
+ cm->current_video_frame = 0;
+ cpi_->auto_gold = 1;
+ cpi_->key_frame_count = 1;
+ cpi_->rate_correction_factor = 1.0;
+ cpi_->key_frame_rate_correction_factor = 1.0;
+ cpi_->cyclic_refresh_mode_enabled = 0;
+ cpi_->auto_worst_q = 1;
+ cpi_->kf_overspend_bits = 0;
+ cpi_->kf_bitrate_adjustment = 0;
+ cpi_->gf_overspend_bits = 0;
+ cpi_->non_gf_bitrate_adjustment = 0;
+ UpdateRateControl(rc_cfg);
+ cpi_->buffer_level = oxcf->starting_buffer_level;
+ cpi_->bits_off_target = oxcf->starting_buffer_level;
+}
+
+void VP8RateControlRTC::UpdateRateControl(
+ const VP8RateControlRtcConfig &rc_cfg) {
+ VP8_COMMON *cm = &cpi_->common;
+ VP8_CONFIG *oxcf = &cpi_->oxcf;
+ vpx_clear_system_state();
+ cm->Width = rc_cfg.width;
+ cm->Height = rc_cfg.height;
+ oxcf->Width = rc_cfg.width;
+ oxcf->Height = rc_cfg.height;
+ oxcf->worst_allowed_q = kQTrans[rc_cfg.max_quantizer];
+ oxcf->best_allowed_q = kQTrans[rc_cfg.min_quantizer];
+ cpi_->worst_quality = oxcf->worst_allowed_q;
+ cpi_->best_quality = oxcf->best_allowed_q;
+ cpi_->output_framerate = rc_cfg.framerate;
+ oxcf->target_bandwidth =
+ static_cast<unsigned int>(1000 * rc_cfg.target_bandwidth);
+ cpi_->ref_framerate = cpi_->output_framerate;
+ oxcf->fixed_q = -1;
+ oxcf->error_resilient_mode = 1;
+ oxcf->starting_buffer_level_in_ms = rc_cfg.buf_initial_sz;
+ oxcf->optimal_buffer_level_in_ms = rc_cfg.buf_optimal_sz;
+ oxcf->maximum_buffer_size_in_ms = rc_cfg.buf_sz;
+ oxcf->starting_buffer_level = rc_cfg.buf_initial_sz;
+ oxcf->optimal_buffer_level = rc_cfg.buf_optimal_sz;
+ oxcf->maximum_buffer_size = rc_cfg.buf_sz;
+ oxcf->number_of_layers = rc_cfg.ts_number_layers;
+ cpi_->buffered_mode = oxcf->optimal_buffer_level > 0;
+ oxcf->under_shoot_pct = rc_cfg.undershoot_pct;
+ oxcf->over_shoot_pct = rc_cfg.overshoot_pct;
+ cpi_->oxcf.rc_max_intra_bitrate_pct = rc_cfg.max_intra_bitrate_pct;
+ cpi_->framerate = rc_cfg.framerate;
+ for (int i = 0; i < KEY_FRAME_CONTEXT; ++i) {
+ cpi_->prior_key_frame_distance[i] =
+ static_cast<int>(cpi_->output_framerate);
+ }
+
+ if (oxcf->number_of_layers > 1) {
+ memcpy(oxcf->target_bitrate, rc_cfg.layer_target_bitrate,
+ sizeof(rc_cfg.layer_target_bitrate));
+ memcpy(oxcf->rate_decimator, rc_cfg.ts_rate_decimator,
+ sizeof(rc_cfg.ts_rate_decimator));
+ oxcf->periodicity = 2;
+
+ double prev_layer_framerate = 0;
+ for (unsigned int i = 0; i < oxcf->number_of_layers; ++i) {
+ vp8_init_temporal_layer_context(cpi_, oxcf, i, prev_layer_framerate);
+ prev_layer_framerate = cpi_->output_framerate / oxcf->rate_decimator[i];
+ }
+ }
+
+ cpi_->total_actual_bits = 0;
+ cpi_->total_target_vs_actual = 0;
+
+ cm->mb_rows = cm->Height >> 4;
+ cm->mb_cols = cm->Width >> 4;
+ cm->MBs = cm->mb_rows * cm->mb_cols;
+ cm->mode_info_stride = cm->mb_cols + 1;
+
+ oxcf->starting_buffer_level =
+ rescale((int)oxcf->starting_buffer_level, oxcf->target_bandwidth, 1000);
+ /* Set or reset optimal and maximum buffer levels. */
+ if (oxcf->optimal_buffer_level == 0) {
+ oxcf->optimal_buffer_level = oxcf->target_bandwidth / 8;
+ } else {
+ oxcf->optimal_buffer_level =
+ rescale((int)oxcf->optimal_buffer_level, oxcf->target_bandwidth, 1000);
+ }
+ if (oxcf->maximum_buffer_size == 0) {
+ oxcf->maximum_buffer_size = oxcf->target_bandwidth / 8;
+ } else {
+ oxcf->maximum_buffer_size =
+ rescale((int)oxcf->maximum_buffer_size, oxcf->target_bandwidth, 1000);
+ }
+
+ if (cpi_->bits_off_target > oxcf->maximum_buffer_size) {
+ cpi_->bits_off_target = oxcf->maximum_buffer_size;
+ cpi_->buffer_level = cpi_->bits_off_target;
+ }
+
+ vp8_new_framerate(cpi_, cpi_->framerate);
+ vpx_clear_system_state();
+}
+
+void VP8RateControlRTC::ComputeQP(const VP8FrameParamsQpRTC &frame_params) {
+ VP8_COMMON *const cm = &cpi_->common;
+ vpx_clear_system_state();
+ if (cpi_->oxcf.number_of_layers > 1) {
+ cpi_->temporal_layer_id = frame_params.temporal_layer_id;
+ const int layer = frame_params.temporal_layer_id;
+ vp8_update_layer_contexts(cpi_);
+ /* Restore layer specific context & set frame rate */
+ vp8_restore_layer_context(cpi_, layer);
+ vp8_new_framerate(cpi_, cpi_->layer_context[layer].framerate);
+ }
+ cm->frame_type = frame_params.frame_type;
+ cm->refresh_golden_frame = (cm->frame_type == KEY_FRAME) ? 1 : 0;
+ cm->refresh_alt_ref_frame = (cm->frame_type == KEY_FRAME) ? 1 : 0;
+ if (cm->frame_type == KEY_FRAME && cpi_->common.current_video_frame > 0) {
+ cpi_->common.frame_flags |= FRAMEFLAGS_KEY;
+ }
+
+ vp8_pick_frame_size(cpi_);
+
+ if (cpi_->buffer_level >= cpi_->oxcf.optimal_buffer_level &&
+ cpi_->buffered_mode) {
+ /* Max adjustment is 1/4 */
+ int Adjustment = cpi_->active_worst_quality / 4;
+ if (Adjustment) {
+ int buff_lvl_step;
+ if (cpi_->buffer_level < cpi_->oxcf.maximum_buffer_size) {
+ buff_lvl_step = (int)((cpi_->oxcf.maximum_buffer_size -
+ cpi_->oxcf.optimal_buffer_level) /
+ Adjustment);
+ if (buff_lvl_step) {
+ Adjustment =
+ (int)((cpi_->buffer_level - cpi_->oxcf.optimal_buffer_level) /
+ buff_lvl_step);
+ } else {
+ Adjustment = 0;
+ }
+ }
+ cpi_->active_worst_quality -= Adjustment;
+ if (cpi_->active_worst_quality < cpi_->active_best_quality) {
+ cpi_->active_worst_quality = cpi_->active_best_quality;
+ }
+ }
+ }
+
+ if (cpi_->ni_frames > 150) {
+ int q = cpi_->active_worst_quality;
+ if (cm->frame_type == KEY_FRAME) {
+ cpi_->active_best_quality = kf_high_motion_minq[q];
+ } else {
+ cpi_->active_best_quality = inter_minq[q];
+ }
+
+ if (cpi_->buffer_level >= cpi_->oxcf.maximum_buffer_size) {
+ cpi_->active_best_quality = cpi_->best_quality;
+
+ } else if (cpi_->buffer_level > cpi_->oxcf.optimal_buffer_level) {
+ int Fraction =
+ (int)(((cpi_->buffer_level - cpi_->oxcf.optimal_buffer_level) * 128) /
+ (cpi_->oxcf.maximum_buffer_size -
+ cpi_->oxcf.optimal_buffer_level));
+ int min_qadjustment =
+ ((cpi_->active_best_quality - cpi_->best_quality) * Fraction) / 128;
+
+ cpi_->active_best_quality -= min_qadjustment;
+ }
+ }
+
+ /* Clip the active best and worst quality values to limits */
+ if (cpi_->active_worst_quality > cpi_->worst_quality) {
+ cpi_->active_worst_quality = cpi_->worst_quality;
+ }
+ if (cpi_->active_best_quality < cpi_->best_quality) {
+ cpi_->active_best_quality = cpi_->best_quality;
+ }
+ if (cpi_->active_worst_quality < cpi_->active_best_quality) {
+ cpi_->active_worst_quality = cpi_->active_best_quality;
+ }
+
+ q_ = vp8_regulate_q(cpi_, cpi_->this_frame_target);
+ vp8_set_quantizer(cpi_, q_);
+ vpx_clear_system_state();
+}
+
+int VP8RateControlRTC::GetQP() const { return q_; }
+
+void VP8RateControlRTC::PostEncodeUpdate(uint64_t encoded_frame_size) {
+ VP8_COMMON *const cm = &cpi_->common;
+ vpx_clear_system_state();
+ cpi_->total_byte_count += encoded_frame_size;
+ cpi_->projected_frame_size = static_cast<int>(encoded_frame_size << 3);
+ if (cpi_->oxcf.number_of_layers > 1) {
+ for (unsigned int i = cpi_->current_layer + 1;
+ i < cpi_->oxcf.number_of_layers; ++i) {
+ cpi_->layer_context[i].total_byte_count += encoded_frame_size;
+ }
+ }
+
+ vp8_update_rate_correction_factors(cpi_, 2);
+
+ cpi_->last_q[cm->frame_type] = cm->base_qindex;
+
+ if (cm->frame_type == KEY_FRAME) {
+ vp8_adjust_key_frame_context(cpi_);
+ }
+
+ /* Keep a record of ambient average Q. */
+ if (cm->frame_type != KEY_FRAME) {
+ cpi_->avg_frame_qindex =
+ (2 + 3 * cpi_->avg_frame_qindex + cm->base_qindex) >> 2;
+ }
+ /* Keep a record from which we can calculate the average Q excluding
+ * key frames.
+ */
+ if (cm->frame_type != KEY_FRAME) {
+ cpi_->ni_frames++;
+ /* Damp value for first few frames */
+ if (cpi_->ni_frames > 150) {
+ cpi_->ni_tot_qi += q_;
+ cpi_->ni_av_qi = (cpi_->ni_tot_qi / cpi_->ni_frames);
+ } else {
+ cpi_->ni_tot_qi += q_;
+ cpi_->ni_av_qi =
+ ((cpi_->ni_tot_qi / cpi_->ni_frames) + cpi_->worst_quality + 1) / 2;
+ }
+
+ /* If the average Q is higher than what was used in the last
+ * frame (after going through the recode loop to keep the frame
+ * size within range) then use the last frame value - 1. The -1
+ * is designed to stop Q and hence the data rate, from
+ * progressively falling away during difficult sections, but at
+ * the same time reduce the number of itterations around the
+ * recode loop.
+ */
+ if (q_ > cpi_->ni_av_qi) cpi_->ni_av_qi = q_ - 1;
+ }
+
+ cpi_->bits_off_target +=
+ cpi_->av_per_frame_bandwidth - cpi_->projected_frame_size;
+ if (cpi_->bits_off_target > cpi_->oxcf.maximum_buffer_size) {
+ cpi_->bits_off_target = cpi_->oxcf.maximum_buffer_size;
+ }
+
+ cpi_->total_actual_bits += cpi_->projected_frame_size;
+ cpi_->buffer_level = cpi_->bits_off_target;
+
+ /* Propagate values to higher temporal layers */
+ if (cpi_->oxcf.number_of_layers > 1) {
+ for (unsigned int i = cpi_->current_layer + 1;
+ i < cpi_->oxcf.number_of_layers; ++i) {
+ LAYER_CONTEXT *lc = &cpi_->layer_context[i];
+ int bits_off_for_this_layer = (int)round(
+ lc->target_bandwidth / lc->framerate - cpi_->projected_frame_size);
+
+ lc->bits_off_target += bits_off_for_this_layer;
+
+ /* Clip buffer level to maximum buffer size for the layer */
+ if (lc->bits_off_target > lc->maximum_buffer_size) {
+ lc->bits_off_target = lc->maximum_buffer_size;
+ }
+
+ lc->total_actual_bits += cpi_->projected_frame_size;
+ lc->total_target_vs_actual += bits_off_for_this_layer;
+ lc->buffer_level = lc->bits_off_target;
+ }
+ }
+
+ cpi_->common.current_video_frame++;
+ cpi_->frames_since_key++;
+
+ if (cpi_->oxcf.number_of_layers > 1) vp8_save_layer_context(cpi_);
+ vpx_clear_system_state();
+}
+} // namespace libvpx
--- /dev/null
+++ b/vp8/vp8_ratectrl_rtc.h
@@ -1,0 +1,63 @@
+/*
+ * Copyright (c) 2021 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef VPX_VP8_RATECTRL_RTC_H_
+#define VPX_VP8_RATECTRL_RTC_H_
+
+#include <cstdint>
+#include <memory>
+
+#include "vp8/encoder/onyx_int.h"
+#include "vp8/common/common.h"
+#include "vpx/internal/vpx_ratectrl_rtc.h"
+
+namespace libvpx {
+struct VP8RateControlRtcConfig : public VpxRateControlRtcConfig {
+ public:
+ VP8RateControlRtcConfig() {
+ vp8_zero(layer_target_bitrate);
+ vp8_zero(ts_rate_decimator);
+ }
+};
+
+struct VP8FrameParamsQpRTC {
+ FRAME_TYPE frame_type;
+ int temporal_layer_id;
+};
+
+class VP8RateControlRTC {
+ public:
+ static std::unique_ptr<VP8RateControlRTC> Create(
+ const VP8RateControlRtcConfig &cfg);
+ ~VP8RateControlRTC() {
+ if (cpi_) {
+ vpx_free(cpi_->gf_active_flags);
+ vpx_free(cpi_);
+ }
+ }
+
+ void UpdateRateControl(const VP8RateControlRtcConfig &rc_cfg);
+ // GetQP() needs to be called after ComputeQP() to get the latest QP
+ int GetQP() const;
+ // int GetLoopfilterLevel() const;
+ void ComputeQP(const VP8FrameParamsQpRTC &frame_params);
+ // Feedback to rate control with the size of current encoded frame
+ void PostEncodeUpdate(uint64_t encoded_frame_size);
+
+ private:
+ VP8RateControlRTC() {}
+ void InitRateControl(const VP8RateControlRtcConfig &cfg);
+ VP8_COMP *cpi_;
+ int q_;
+};
+
+} // namespace libvpx
+
+#endif // VPX_VP8_RATECTRL_RTC_H_
--- a/vp9/encoder/arm/neon/vp9_denoiser_neon.c
+++ b/vp9/encoder/arm/neon/vp9_denoiser_neon.c
@@ -21,6 +21,9 @@
// Compute the sum of all pixel differences of this MB.
static INLINE int horizontal_add_s8x16(const int8x16_t v_sum_diff_total) {
+#if defined(__aarch64__)
+ return vaddlvq_s8(v_sum_diff_total);
+#else
const int16x8_t fe_dc_ba_98_76_54_32_10 = vpaddlq_s8(v_sum_diff_total);
const int32x4_t fedc_ba98_7654_3210 = vpaddlq_s16(fe_dc_ba_98_76_54_32_10);
const int64x2_t fedcba98_76543210 = vpaddlq_s32(fedc_ba98_7654_3210);
@@ -28,6 +31,7 @@
vget_low_s64(fedcba98_76543210));
const int sum_diff = vget_lane_s32(vreinterpret_s32_s64(x), 0);
return sum_diff;
+#endif
}
// Denoise a 16x1 vector.
--- a/vp9/encoder/vp9_aq_cyclicrefresh.c
+++ b/vp9/encoder/vp9_aq_cyclicrefresh.c
@@ -48,6 +48,7 @@
assert(MAXQ <= 255);
memset(cr->last_coded_q_map, MAXQ, last_coded_q_map_size);
cr->counter_encode_maxq_scene_change = 0;
+ cr->content_mode = 1;
return cr;
}
@@ -326,7 +327,8 @@
else
rc->baseline_gf_interval = 40;
if (cpi->oxcf.rc_mode == VPX_VBR) rc->baseline_gf_interval = 20;
- if (rc->avg_frame_low_motion < 50 && rc->frames_since_key > 40)
+ if (rc->avg_frame_low_motion < 50 && rc->frames_since_key > 40 &&
+ cr->content_mode)
rc->baseline_gf_interval = 10;
}
@@ -388,7 +390,8 @@
? vp9_get_qindex(&cm->seg, CR_SEGMENT_ID_BOOST2, cm->base_qindex)
: vp9_get_qindex(&cm->seg, CR_SEGMENT_ID_BOOST1, cm->base_qindex);
// More aggressive settings for noisy content.
- if (cpi->noise_estimate.enabled && cpi->noise_estimate.level >= kMedium) {
+ if (cpi->noise_estimate.enabled && cpi->noise_estimate.level >= kMedium &&
+ cr->content_mode) {
consec_zero_mv_thresh = 60;
qindex_thresh =
VPXMAX(vp9_get_qindex(&cm->seg, CR_SEGMENT_ID_BOOST1, cm->base_qindex),
@@ -409,7 +412,7 @@
#if CONFIG_VP9_HIGHBITDEPTH
if (cpi->common.use_highbitdepth) compute_content = 0;
#endif
- if (cpi->Last_Source == NULL ||
+ if (cr->content_mode == 0 || cpi->Last_Source == NULL ||
cpi->Last_Source->y_width != cpi->Source->y_width ||
cpi->Last_Source->y_height != cpi->Source->y_height)
compute_content = 0;
@@ -430,7 +433,8 @@
// reset to 0 later depending on the coding mode.
if (cr->map[bl_index2] == 0) {
count_tot++;
- if (cr->last_coded_q_map[bl_index2] > qindex_thresh ||
+ if (cr->content_mode == 0 ||
+ cr->last_coded_q_map[bl_index2] > qindex_thresh ||
cpi->consec_zero_mv[bl_index2] < consec_zero_mv_thresh_block) {
sum_map++;
count_sel++;
@@ -489,7 +493,8 @@
rc->avg_frame_qindex[INTER_FRAME] < qp_thresh ||
(cpi->use_svc &&
cpi->svc.layer_context[cpi->svc.temporal_layer_id].is_key_frame) ||
- (!cpi->use_svc && rc->avg_frame_low_motion < thresh_low_motion &&
+ (!cpi->use_svc && cr->content_mode &&
+ rc->avg_frame_low_motion < thresh_low_motion &&
rc->frames_since_key > 40) ||
(!cpi->use_svc && rc->avg_frame_qindex[INTER_FRAME] > qp_max_thresh &&
rc->frames_since_key > 20)) {
@@ -511,7 +516,8 @@
cr->rate_ratio_qdelta = 3.0;
} else {
cr->rate_ratio_qdelta = 2.0;
- if (cpi->noise_estimate.enabled && cpi->noise_estimate.level >= kMedium) {
+ if (cr->content_mode && cpi->noise_estimate.enabled &&
+ cpi->noise_estimate.level >= kMedium) {
// Reduce the delta-qp if the estimated source noise is above threshold.
cr->rate_ratio_qdelta = 1.7;
cr->rate_boost_fac = 13;
@@ -528,7 +534,7 @@
cr->percent_refresh = (cr->skip_flat_static_blocks) ? 5 : 10;
// Increase the amount of refresh on scene change that is encoded at max Q,
// increase for a few cycles of the refresh period (~100 / percent_refresh).
- if (cr->counter_encode_maxq_scene_change < 30)
+ if (cr->content_mode && cr->counter_encode_maxq_scene_change < 30)
cr->percent_refresh = (cr->skip_flat_static_blocks) ? 10 : 15;
cr->rate_ratio_qdelta = 2.0;
cr->rate_boost_fac = 10;
@@ -575,6 +581,12 @@
(double)(cr->actual_num_seg1_blocks + cr->actual_num_seg2_blocks) /
num8x8bl;
cr->weight_segment = weight_segment;
+ if (cr->content_mode == 0) {
+ cr->actual_num_seg1_blocks =
+ cr->percent_refresh * cm->mi_rows * cm->mi_cols / 100;
+ cr->actual_num_seg2_blocks = 0;
+ cr->weight_segment = (double)(cr->actual_num_seg1_blocks) / num8x8bl;
+ }
}
// Setup cyclic background refresh: set delta q and segmentation map.
--- a/vp9/encoder/vp9_aq_cyclicrefresh.h
+++ b/vp9/encoder/vp9_aq_cyclicrefresh.h
@@ -70,6 +70,7 @@
int apply_cyclic_refresh;
int counter_encode_maxq_scene_change;
int skip_flat_static_blocks;
+ int content_mode;
};
struct VP9_COMP;
--- a/vp9/encoder/vp9_block.h
+++ b/vp9/encoder/vp9_block.h
@@ -157,6 +157,9 @@
// skip forward transform and quantization
uint8_t skip_txfm[MAX_MB_PLANE << 2];
#define SKIP_TXFM_NONE 0
+// TODO(chengchen): consider remove SKIP_TXFM_AC_DC from vp9 completely
+// since it increases risks of bad perceptual quality.
+// https://crbug.com/webm/1729
#define SKIP_TXFM_AC_DC 1
#define SKIP_TXFM_AC_ONLY 2
--- a/vp9/encoder/vp9_encodeframe.c
+++ b/vp9/encoder/vp9_encodeframe.c
@@ -159,37 +159,6 @@
}
#endif // CONFIG_VP9_HIGHBITDEPTH
-#if !CONFIG_REALTIME_ONLY
-static unsigned int get_sby_perpixel_diff_variance(VP9_COMP *cpi,
- const struct buf_2d *ref,
- int mi_row, int mi_col,
- BLOCK_SIZE bs) {
- unsigned int sse, var;
- uint8_t *last_y;
- const YV12_BUFFER_CONFIG *last = get_ref_frame_buffer(cpi, LAST_FRAME);
-
- assert(last != NULL);
- last_y =
- &last->y_buffer[mi_row * MI_SIZE * last->y_stride + mi_col * MI_SIZE];
- var = cpi->fn_ptr[bs].vf(ref->buf, ref->stride, last_y, last->y_stride, &sse);
- return ROUND_POWER_OF_TWO(var, num_pels_log2_lookup[bs]);
-}
-
-static BLOCK_SIZE get_rd_var_based_fixed_partition(VP9_COMP *cpi, MACROBLOCK *x,
- int mi_row, int mi_col) {
- unsigned int var = get_sby_perpixel_diff_variance(
- cpi, &x->plane[0].src, mi_row, mi_col, BLOCK_64X64);
- if (var < 8)
- return BLOCK_64X64;
- else if (var < 128)
- return BLOCK_32X32;
- else if (var < 2048)
- return BLOCK_16X16;
- else
- return BLOCK_8X8;
-}
-#endif // !CONFIG_REALTIME_ONLY
-
static void set_segment_index(VP9_COMP *cpi, MACROBLOCK *const x, int mi_row,
int mi_col, BLOCK_SIZE bsize, int segment_index) {
VP9_COMMON *const cm = &cpi->common;
@@ -815,8 +784,8 @@
// Check if most of the superblock is skin content, and if so, force split to
// 32x32, and set x->sb_is_skin for use in mode selection.
-static int skin_sb_split(VP9_COMP *cpi, MACROBLOCK *x, const int low_res,
- int mi_row, int mi_col, int *force_split) {
+static int skin_sb_split(VP9_COMP *cpi, const int low_res, int mi_row,
+ int mi_col, int *force_split) {
VP9_COMMON *const cm = &cpi->common;
#if CONFIG_VP9_HIGHBITDEPTH
if (cm->use_highbitdepth) return 0;
@@ -828,11 +797,6 @@
mi_row + 8 < cm->mi_rows)) {
int num_16x16_skin = 0;
int num_16x16_nonskin = 0;
- uint8_t *ysignal = x->plane[0].src.buf;
- uint8_t *usignal = x->plane[1].src.buf;
- uint8_t *vsignal = x->plane[2].src.buf;
- int sp = x->plane[0].src.stride;
- int spuv = x->plane[1].src.stride;
const int block_index = mi_row * cm->mi_cols + mi_col;
const int bw = num_8x8_blocks_wide_lookup[BLOCK_64X64];
const int bh = num_8x8_blocks_high_lookup[BLOCK_64X64];
@@ -851,13 +815,7 @@
i = ymis;
break;
}
- ysignal += 16;
- usignal += 8;
- vsignal += 8;
}
- ysignal += (sp << 4) - 64;
- usignal += (spuv << 3) - 32;
- vsignal += (spuv << 3) - 32;
}
if (num_16x16_skin > 12) {
*force_split = 1;
@@ -1534,8 +1492,7 @@
vp9_build_inter_predictors_sb(xd, mi_row, mi_col, BLOCK_64X64);
if (cpi->use_skin_detection)
- x->sb_is_skin =
- skin_sb_split(cpi, x, low_res, mi_row, mi_col, force_split);
+ x->sb_is_skin = skin_sb_split(cpi, low_res, mi_row, mi_col, force_split);
d = xd->plane[0].dst.buf;
dp = xd->plane[0].dst.stride;
@@ -1842,7 +1799,8 @@
}
// Else for cyclic refresh mode update the segment map, set the segment id
// and then update the quantizer.
- if (cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ) {
+ if (cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ &&
+ cpi->cyclic_refresh->content_mode) {
vp9_cyclic_refresh_update_segment(cpi, xd->mi[0], mi_row, mi_col, bsize,
ctx->rate, ctx->dist, x->skip, p);
}
@@ -2539,7 +2497,8 @@
if (seg->enabled && (cpi->oxcf.aq_mode != NO_AQ || cpi->roi.enabled)) {
// Setting segmentation map for cyclic_refresh.
- if (cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ) {
+ if (cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ &&
+ cpi->cyclic_refresh->content_mode) {
vp9_cyclic_refresh_update_segment(cpi, mi, mi_row, mi_col, bsize,
ctx->rate, ctx->dist, x->skip, p);
} else {
@@ -3119,54 +3078,6 @@
memcpy(x->pred_mv, ctx->pred_mv, sizeof(x->pred_mv));
}
-#if CONFIG_FP_MB_STATS
-const int num_16x16_blocks_wide_lookup[BLOCK_SIZES] = { 1, 1, 1, 1, 1, 1, 1,
- 1, 2, 2, 2, 4, 4 };
-const int num_16x16_blocks_high_lookup[BLOCK_SIZES] = { 1, 1, 1, 1, 1, 1, 1,
- 2, 1, 2, 4, 2, 4 };
-const int qindex_skip_threshold_lookup[BLOCK_SIZES] = { 0, 10, 10, 30, 40,
- 40, 60, 80, 80, 90,
- 100, 100, 120 };
-const int qindex_split_threshold_lookup[BLOCK_SIZES] = { 0, 3, 3, 7, 15,
- 15, 30, 40, 40, 60,
- 80, 80, 120 };
-const int complexity_16x16_blocks_threshold[BLOCK_SIZES] = { 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1,
- 4, 4, 6 };
-
-typedef enum {
- MV_ZERO = 0,
- MV_LEFT = 1,
- MV_UP = 2,
- MV_RIGHT = 3,
- MV_DOWN = 4,
- MV_INVALID
-} MOTION_DIRECTION;
-
-static INLINE MOTION_DIRECTION get_motion_direction_fp(uint8_t fp_byte) {
- if (fp_byte & FPMB_MOTION_ZERO_MASK) {
- return MV_ZERO;
- } else if (fp_byte & FPMB_MOTION_LEFT_MASK) {
- return MV_LEFT;
- } else if (fp_byte & FPMB_MOTION_RIGHT_MASK) {
- return MV_RIGHT;
- } else if (fp_byte & FPMB_MOTION_UP_MASK) {
- return MV_UP;
- } else {
- return MV_DOWN;
- }
-}
-
-static INLINE int get_motion_inconsistency(MOTION_DIRECTION this_mv,
- MOTION_DIRECTION that_mv) {
- if (this_mv == that_mv) {
- return 0;
- } else {
- return abs(this_mv - that_mv) == 2 ? 2 : 1;
- }
-}
-#endif
-
// Calculate prediction based on the given input features and neural net config.
// Assume there are no more than NN_MAX_NODES_PER_LAYER nodes in each hidden
// layer.
@@ -4064,11 +3975,6 @@
BLOCK_SIZE min_size = x->min_partition_size;
BLOCK_SIZE max_size = x->max_partition_size;
-#if CONFIG_FP_MB_STATS
- unsigned int src_diff_var = UINT_MAX;
- int none_complexity = 0;
-#endif
-
int partition_none_allowed = !force_horz_split && !force_vert_split;
int partition_horz_allowed =
!force_vert_split && yss <= xss && bsize >= BLOCK_8X8;
@@ -4155,65 +4061,6 @@
save_context(x, mi_row, mi_col, a, l, sa, sl, bsize);
-#if CONFIG_FP_MB_STATS
- if (cpi->use_fp_mb_stats) {
- set_offsets(cpi, tile_info, x, mi_row, mi_col, bsize);
- src_diff_var = get_sby_perpixel_diff_variance(cpi, &x->plane[0].src, mi_row,
- mi_col, bsize);
- }
-#endif
-
-#if CONFIG_FP_MB_STATS
- // Decide whether we shall split directly and skip searching NONE by using
- // the first pass block statistics
- if (cpi->use_fp_mb_stats && bsize >= BLOCK_32X32 && do_split &&
- partition_none_allowed && src_diff_var > 4 &&
- cm->base_qindex < qindex_split_threshold_lookup[bsize]) {
- int mb_row = mi_row >> 1;
- int mb_col = mi_col >> 1;
- int mb_row_end =
- VPXMIN(mb_row + num_16x16_blocks_high_lookup[bsize], cm->mb_rows);
- int mb_col_end =
- VPXMIN(mb_col + num_16x16_blocks_wide_lookup[bsize], cm->mb_cols);
- int r, c;
-
- // compute a complexity measure, basically measure inconsistency of motion
- // vectors obtained from the first pass in the current block
- for (r = mb_row; r < mb_row_end; r++) {
- for (c = mb_col; c < mb_col_end; c++) {
- const int mb_index = r * cm->mb_cols + c;
-
- MOTION_DIRECTION this_mv;
- MOTION_DIRECTION right_mv;
- MOTION_DIRECTION bottom_mv;
-
- this_mv =
- get_motion_direction_fp(cpi->twopass.this_frame_mb_stats[mb_index]);
-
- // to its right
- if (c != mb_col_end - 1) {
- right_mv = get_motion_direction_fp(
- cpi->twopass.this_frame_mb_stats[mb_index + 1]);
- none_complexity += get_motion_inconsistency(this_mv, right_mv);
- }
-
- // to its bottom
- if (r != mb_row_end - 1) {
- bottom_mv = get_motion_direction_fp(
- cpi->twopass.this_frame_mb_stats[mb_index + cm->mb_cols]);
- none_complexity += get_motion_inconsistency(this_mv, bottom_mv);
- }
-
- // do not count its left and top neighbors to avoid double counting
- }
- }
-
- if (none_complexity > complexity_16x16_blocks_threshold[bsize]) {
- partition_none_allowed = 0;
- }
- }
-#endif
-
pc_tree->partitioning = PARTITION_NONE;
if (cpi->sf.rd_ml_partition.var_pruning && !frame_is_intra_only(cm)) {
@@ -4291,53 +4138,6 @@
}
}
}
-
-#if CONFIG_FP_MB_STATS
- // Check if every 16x16 first pass block statistics has zero
- // motion and the corresponding first pass residue is small enough.
- // If that is the case, check the difference variance between the
- // current frame and the last frame. If the variance is small enough,
- // stop further splitting in RD optimization
- if (cpi->use_fp_mb_stats && do_split != 0 &&
- cm->base_qindex > qindex_skip_threshold_lookup[bsize]) {
- int mb_row = mi_row >> 1;
- int mb_col = mi_col >> 1;
- int mb_row_end =
- VPXMIN(mb_row + num_16x16_blocks_high_lookup[bsize], cm->mb_rows);
- int mb_col_end =
- VPXMIN(mb_col + num_16x16_blocks_wide_lookup[bsize], cm->mb_cols);
- int r, c;
-
- int skip = 1;
- for (r = mb_row; r < mb_row_end; r++) {
- for (c = mb_col; c < mb_col_end; c++) {
- const int mb_index = r * cm->mb_cols + c;
- if (!(cpi->twopass.this_frame_mb_stats[mb_index] &
- FPMB_MOTION_ZERO_MASK) ||
- !(cpi->twopass.this_frame_mb_stats[mb_index] &
- FPMB_ERROR_SMALL_MASK)) {
- skip = 0;
- break;
- }
- }
- if (skip == 0) {
- break;
- }
- }
-
- if (skip) {
- if (src_diff_var == UINT_MAX) {
- set_offsets(cpi, tile_info, x, mi_row, mi_col, bsize);
- src_diff_var = get_sby_perpixel_diff_variance(
- cpi, &x->plane[0].src, mi_row, mi_col, bsize);
- }
- if (src_diff_var < 8) {
- do_split = 0;
- do_rect = 0;
- }
- }
- }
-#endif
}
}
restore_context(x, mi_row, mi_col, a, l, sa, sl, bsize);
@@ -4603,15 +4403,18 @@
encode_sb(cpi, td, tile_info, tp, mi_row, mi_col, output_enabled, bsize,
pc_tree);
#if CONFIG_RATE_CTRL
- // Store partition, motion vector of the superblock.
- if (output_enabled) {
- const int num_unit_rows = get_num_unit_4x4(cpi->frame_info.frame_height);
- const int num_unit_cols = get_num_unit_4x4(cpi->frame_info.frame_width);
- store_superblock_info(pc_tree, cm->mi_grid_visible, cm->mi_stride,
- num_4x4_blocks_wide_lookup[BLOCK_64X64],
- num_unit_rows, num_unit_cols, mi_row << 1,
- mi_col << 1, cpi->partition_info,
- cpi->motion_vector_info);
+ if (oxcf->use_simple_encode_api) {
+ // Store partition, motion vector of the superblock.
+ if (output_enabled) {
+ const int num_unit_rows =
+ get_num_unit_4x4(cpi->frame_info.frame_height);
+ const int num_unit_cols = get_num_unit_4x4(cpi->frame_info.frame_width);
+ store_superblock_info(pc_tree, cm->mi_grid_visible, cm->mi_stride,
+ num_4x4_blocks_wide_lookup[BLOCK_64X64],
+ num_unit_rows, num_unit_cols, mi_row << 1,
+ mi_col << 1, cpi->partition_info,
+ cpi->motion_vector_info);
+ }
}
#endif // CONFIG_RATE_CTRL
}
@@ -4700,13 +4503,6 @@
set_fixed_partitioning(cpi, tile_info, mi, mi_row, mi_col, bsize);
rd_use_partition(cpi, td, tile_data, mi, tp, mi_row, mi_col, BLOCK_64X64,
&dummy_rate, &dummy_dist, 1, td->pc_root);
- } else if (cpi->partition_search_skippable_frame) {
- BLOCK_SIZE bsize;
- set_offsets(cpi, tile_info, x, mi_row, mi_col, BLOCK_64X64);
- bsize = get_rd_var_based_fixed_partition(cpi, x, mi_row, mi_col);
- set_fixed_partitioning(cpi, tile_info, mi, mi_row, mi_col, bsize);
- rd_use_partition(cpi, td, tile_data, mi, tp, mi_row, mi_col, BLOCK_64X64,
- &dummy_rate, &dummy_dist, 1, td->pc_root);
} else if (sf->partition_search_type == VAR_BASED_PARTITION &&
cm->frame_type != KEY_FRAME) {
choose_partitioning(cpi, tile_info, x, mi_row, mi_col);
@@ -5981,9 +5777,14 @@
for (i = 0; i < BLOCK_SIZES; ++i) {
for (j = 0; j < MAX_MODES; ++j) {
tile_data->thresh_freq_fact[i][j] = RD_THRESH_INIT_FACT;
-#if CONFIG_CONSISTENT_RECODE || CONFIG_RATE_CTRL
+#if CONFIG_RATE_CTRL
+ if (cpi->oxcf.use_simple_encode_api) {
+ tile_data->thresh_freq_fact_prev[i][j] = RD_THRESH_INIT_FACT;
+ }
+#endif // CONFIG_RATE_CTRL
+#if CONFIG_CONSISTENT_RECODE
tile_data->thresh_freq_fact_prev[i][j] = RD_THRESH_INIT_FACT;
-#endif // CONFIG_CONSISTENT_RECODE || CONFIG_RATE_CTRL
+#endif // CONFIG_CONSISTENT_RECODE
tile_data->mode_map[i][j] = j;
}
}
@@ -6072,20 +5873,6 @@
vp9_encode_tile(cpi, &cpi->td, tile_row, tile_col);
}
-#if CONFIG_FP_MB_STATS
-static int input_fpmb_stats(FIRSTPASS_MB_STATS *firstpass_mb_stats,
- VP9_COMMON *cm, uint8_t **this_frame_mb_stats) {
- uint8_t *mb_stats_in = firstpass_mb_stats->mb_stats_start +
- cm->current_video_frame * cm->MBs * sizeof(uint8_t);
-
- if (mb_stats_in > firstpass_mb_stats->mb_stats_end) return EOF;
-
- *this_frame_mb_stats = mb_stats_in;
-
- return 1;
-}
-#endif
-
static int compare_kmeans_data(const void *a, const void *b) {
if (((const KMEANS_DATA *)a)->value > ((const KMEANS_DATA *)b)->value) {
return 1;
@@ -6292,13 +6079,6 @@
struct vpx_usec_timer emr_timer;
vpx_usec_timer_start(&emr_timer);
-#if CONFIG_FP_MB_STATS
- if (cpi->use_fp_mb_stats) {
- input_fpmb_stats(&cpi->twopass.firstpass_mb_stats, cm,
- &cpi->twopass.this_frame_mb_stats);
- }
-#endif
-
if (!cpi->row_mt) {
cpi->row_mt_sync_read_ptr = vp9_row_mt_sync_read_dummy;
cpi->row_mt_sync_write_ptr = vp9_row_mt_sync_write_dummy;
@@ -6406,7 +6186,12 @@
void vp9_encode_frame(VP9_COMP *cpi) {
VP9_COMMON *const cm = &cpi->common;
-#if CONFIG_CONSISTENT_RECODE || CONFIG_RATE_CTRL
+#if CONFIG_RATE_CTRL
+ if (cpi->oxcf.use_simple_encode_api) {
+ restore_encode_params(cpi);
+ }
+#endif // CONFIG_RATE_CTRL
+#if CONFIG_CONSISTENT_RECODE
restore_encode_params(cpi);
#endif
@@ -6703,7 +6488,8 @@
++td->counts->tx.tx_totals[mi->tx_size];
++td->counts->tx.tx_totals[get_uv_tx_size(mi, &xd->plane[1])];
- if (cm->seg.enabled && cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ)
+ if (cm->seg.enabled && cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ &&
+ cpi->cyclic_refresh->content_mode)
vp9_cyclic_refresh_update_sb_postencode(cpi, mi, mi_row, mi_col, bsize);
if (cpi->oxcf.pass == 0 && cpi->svc.temporal_layer_id == 0 &&
(!cpi->use_svc ||
--- a/vp9/encoder/vp9_encoder.c
+++ b/vp9/encoder/vp9_encoder.c
@@ -654,10 +654,15 @@
}
static int check_seg_range(int seg_data[8], int range) {
- return !(abs(seg_data[0]) > range || abs(seg_data[1]) > range ||
- abs(seg_data[2]) > range || abs(seg_data[3]) > range ||
- abs(seg_data[4]) > range || abs(seg_data[5]) > range ||
- abs(seg_data[6]) > range || abs(seg_data[7]) > range);
+ int i;
+ for (i = 0; i < 8; ++i) {
+ // Note abs() alone can't be used as the behavior of abs(INT_MIN) is
+ // undefined.
+ if (seg_data[i] > range || seg_data[i] < -range) {
+ return 0;
+ }
+ }
+ return 1;
}
VP9_LEVEL vp9_get_level(const Vp9LevelSpec *const level_spec) {
@@ -1022,10 +1027,12 @@
cpi->mi_ssim_rdmult_scaling_factors = NULL;
#if CONFIG_RATE_CTRL
- free_partition_info(cpi);
- free_motion_vector_info(cpi);
- free_fp_motion_vector_info(cpi);
- free_tpl_stats_info(cpi);
+ if (cpi->oxcf.use_simple_encode_api) {
+ free_partition_info(cpi);
+ free_motion_vector_info(cpi);
+ free_fp_motion_vector_info(cpi);
+ free_tpl_stats_info(cpi);
+ }
#endif
vp9_free_ref_frame_buffers(cm->buffer_pool);
@@ -2302,6 +2309,7 @@
cm, cm->frame_contexts,
(FRAME_CONTEXT *)vpx_calloc(FRAME_CONTEXTS, sizeof(*cm->frame_contexts)));
+ cpi->compute_frame_low_motion_onepass = 1;
cpi->use_svc = 0;
cpi->resize_state = ORIG;
cpi->external_resize = 0;
@@ -2317,9 +2325,9 @@
cpi->frame_info = vp9_get_frame_info(oxcf);
vp9_rc_init(&cpi->oxcf, oxcf->pass, &cpi->rc);
+ vp9_init_rd_parameters(cpi);
init_frame_indexes(cm);
- cpi->partition_search_skippable_frame = 0;
cpi->tile_data = NULL;
realloc_segmentation_maps(cpi);
@@ -2360,17 +2368,6 @@
vpx_calloc(cm->MBs * sizeof(*cpi->mbgraph_stats[i].mb_stats), 1));
}
-#if CONFIG_FP_MB_STATS
- cpi->use_fp_mb_stats = 0;
- if (cpi->use_fp_mb_stats) {
- // a place holder used to store the first pass mb stats in the first pass
- CHECK_MEM_ERROR(cm, cpi->twopass.frame_mb_stats_buf,
- vpx_calloc(cm->MBs * sizeof(uint8_t), 1));
- } else {
- cpi->twopass.frame_mb_stats_buf = NULL;
- }
-#endif
-
cpi->refresh_alt_ref_frame = 0;
cpi->b_calculate_psnr = CONFIG_INTERNAL_STATS;
@@ -2464,7 +2461,12 @@
cpi->allow_encode_breakout = ENCODE_BREAKOUT_ENABLED;
- vp9_extrc_init(&cpi->ext_ratectrl);
+ {
+ vpx_codec_err_t codec_status = vp9_extrc_init(&cpi->ext_ratectrl);
+ if (codec_status != VPX_CODEC_OK) {
+ vpx_internal_error(&cm->error, codec_status, "vp9_extrc_init() failed");
+ }
+ }
#if !CONFIG_REALTIME_ONLY
if (oxcf->pass == 1) {
@@ -2518,19 +2520,7 @@
vp9_init_second_pass_spatial_svc(cpi);
} else {
int num_frames;
-#if CONFIG_FP_MB_STATS
- if (cpi->use_fp_mb_stats) {
- const size_t psz = cpi->common.MBs * sizeof(uint8_t);
- const int ps = (int)(oxcf->firstpass_mb_stats_in.sz / psz);
- cpi->twopass.firstpass_mb_stats.mb_stats_start =
- oxcf->firstpass_mb_stats_in.buf;
- cpi->twopass.firstpass_mb_stats.mb_stats_end =
- cpi->twopass.firstpass_mb_stats.mb_stats_start +
- (ps - 1) * cpi->common.MBs * sizeof(uint8_t);
- }
-#endif
-
cpi->twopass.stats_in_start = oxcf->two_pass_stats_in.buf;
cpi->twopass.stats_in = cpi->twopass.stats_in_start;
cpi->twopass.stats_in_end = &cpi->twopass.stats_in[packets - 1];
@@ -2663,10 +2653,12 @@
#if CONFIG_RATE_CTRL
encode_command_init(&cpi->encode_command);
- partition_info_init(cpi);
- motion_vector_info_init(cpi);
- fp_motion_vector_info_init(cpi);
- tpl_stats_info_init(cpi);
+ if (oxcf->use_simple_encode_api) {
+ partition_info_init(cpi);
+ motion_vector_info_init(cpi);
+ fp_motion_vector_info_init(cpi);
+ tpl_stats_info_init(cpi);
+ }
#endif
return cpi;
@@ -2831,13 +2823,6 @@
vpx_free(cpi->mbgraph_stats[i].mb_stats);
}
-#if CONFIG_FP_MB_STATS
- if (cpi->use_fp_mb_stats) {
- vpx_free(cpi->twopass.frame_mb_stats_buf);
- cpi->twopass.frame_mb_stats_buf = NULL;
- }
-#endif
-
vp9_extrc_delete(&cpi->ext_ratectrl);
vp9_remove_common(cm);
@@ -3698,6 +3683,10 @@
cpi->rc.force_max_q = 0;
}
+ if (cpi->use_svc) {
+ cpi->svc.base_qindex[cpi->svc.spatial_layer_id] = *q;
+ }
+
if (!frame_is_intra_only(cm)) {
vp9_set_high_precision_mv(cpi, (*q) < HIGH_PRECISION_MV_QTHRESH);
}
@@ -4198,7 +4187,7 @@
// Update some stats from cyclic refresh, and check for golden frame update.
if (cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ && cm->seg.enabled &&
- !frame_is_intra_only(cm))
+ !frame_is_intra_only(cm) && cpi->cyclic_refresh->content_mode)
vp9_cyclic_refresh_postencode(cpi);
// Update the skip mb flag probabilities based on the distribution
@@ -4392,11 +4381,24 @@
int frame_over_shoot_limit;
int frame_under_shoot_limit;
int q = 0, q_low = 0, q_high = 0;
+ int last_q_attempt = 0;
int enable_acl;
#ifdef AGGRESSIVE_VBR
int qrange_adj = 1;
#endif
+ // A flag which indicates whether we are recoding the current frame
+ // when the current frame size is larger than the max frame size in the
+ // external rate control model.
+ // This flag doesn't have any impact when external rate control is not used.
+ int ext_rc_recode = 0;
+ // Maximal frame size allowed by the external rate control.
+ // case: 0, we ignore the max frame size limit, and encode with the qindex
+ // passed in by the external rate control model.
+ // case: -1, we take VP9's decision for the max frame size.
+ int ext_rc_max_frame_size = 0;
+ const int orig_rc_max_frame_bandwidth = rc->max_frame_bandwidth;
+
#if CONFIG_RATE_CTRL
const FRAME_UPDATE_TYPE update_type =
cpi->twopass.gf_group.update_type[cpi->twopass.gf_group.index];
@@ -4451,11 +4453,6 @@
loop_at_this_size = 0;
}
-#if CONFIG_RATE_CTRL
- if (cpi->encode_command.use_external_target_frame_bits) {
- q = rq_model_predict_q_index(rq_model, rq_history, rc->this_frame_target);
- }
-#endif // CONFIG_RATE_CTRL
// Decide frame size bounds first time through.
if (loop_count == 0) {
vp9_rc_compute_frame_size_bounds(cpi, rc->this_frame_target,
@@ -4498,22 +4495,36 @@
#if CONFIG_RATE_CTRL
// TODO(angiebird): This is a hack for making sure the encoder use the
// external_quantize_index exactly. Avoid this kind of hack later.
- if (cpi->encode_command.use_external_quantize_index) {
- q = cpi->encode_command.external_quantize_index;
+ if (cpi->oxcf.use_simple_encode_api) {
+ if (cpi->encode_command.use_external_target_frame_bits) {
+ q = rq_model_predict_q_index(rq_model, rq_history,
+ rc->this_frame_target);
+ }
+ if (cpi->encode_command.use_external_quantize_index) {
+ q = cpi->encode_command.external_quantize_index;
+ }
}
-#endif
- if (cpi->ext_ratectrl.ready) {
+#endif // CONFIG_RATE_CTRL
+ if (cpi->ext_ratectrl.ready && !ext_rc_recode) {
+ vpx_codec_err_t codec_status;
const GF_GROUP *gf_group = &cpi->twopass.gf_group;
vpx_rc_encodeframe_decision_t encode_frame_decision;
FRAME_UPDATE_TYPE update_type = gf_group->update_type[gf_group->index];
const int ref_frame_flags = get_ref_frame_flags(cpi);
RefCntBuffer *ref_frame_bufs[MAX_INTER_REF_FRAMES];
+ const RefCntBuffer *curr_frame_buf =
+ get_ref_cnt_buffer(cm, cm->new_fb_idx);
get_ref_frame_bufs(cpi, ref_frame_bufs);
- vp9_extrc_get_encodeframe_decision(
- &cpi->ext_ratectrl, cm->current_video_frame,
+ codec_status = vp9_extrc_get_encodeframe_decision(
+ &cpi->ext_ratectrl, curr_frame_buf->frame_index,
cm->current_frame_coding_index, gf_group->index, update_type,
ref_frame_bufs, ref_frame_flags, &encode_frame_decision);
+ if (codec_status != VPX_CODEC_OK) {
+ vpx_internal_error(&cm->error, codec_status,
+ "vp9_extrc_get_encodeframe_decision() failed");
+ }
q = encode_frame_decision.q_index;
+ ext_rc_max_frame_size = encode_frame_decision.max_frame_size;
}
vp9_set_quantizer(cpi, q);
@@ -4555,36 +4566,61 @@
}
if (cpi->ext_ratectrl.ready) {
- break;
+ last_q_attempt = q;
+ // In general, for the external rate control, we take the qindex provided
+ // as input and encode the frame with this qindex faithfully. However,
+ // in some extreme scenarios, the provided qindex leads to a massive
+ // overshoot of frame size. In this case, we fall back to VP9's decision
+ // to pick a new qindex and recode the frame. We return the new qindex
+ // through the API to the external model.
+ if (ext_rc_max_frame_size == 0) {
+ break;
+ } else if (ext_rc_max_frame_size == -1) {
+ if (rc->projected_frame_size < rc->max_frame_bandwidth) {
+ break;
+ }
+ } else {
+ if (rc->projected_frame_size < ext_rc_max_frame_size) {
+ break;
+ }
+ }
+ rc->max_frame_bandwidth = ext_rc_max_frame_size;
+ // If the current frame size exceeds the ext_rc_max_frame_size,
+ // we adjust the worst qindex to meet the frame size constraint.
+ q_high = 255;
+ ext_rc_recode = 1;
}
#if CONFIG_RATE_CTRL
- // This part needs to be after save_coding_context() because
- // restore_coding_context will be called in the end of this function.
- // TODO(angiebird): This is a hack for making sure the encoder use the
- // external_quantize_index exactly. Avoid this kind of hack later.
- if (cpi->encode_command.use_external_quantize_index) {
- break;
- }
+ if (cpi->oxcf.use_simple_encode_api) {
+ // This part needs to be after save_coding_context() because
+ // restore_coding_context will be called in the end of this function.
+ // TODO(angiebird): This is a hack for making sure the encoder use the
+ // external_quantize_index exactly. Avoid this kind of hack later.
+ if (cpi->encode_command.use_external_quantize_index) {
+ break;
+ }
- if (cpi->encode_command.use_external_target_frame_bits) {
- const double percent_diff = get_bits_percent_diff(
- rc->this_frame_target, rc->projected_frame_size);
- update_rq_history(rq_history, rc->this_frame_target,
- rc->projected_frame_size, q);
- loop_count += 1;
+ if (cpi->encode_command.use_external_target_frame_bits) {
+ const double percent_diff = get_bits_percent_diff(
+ rc->this_frame_target, rc->projected_frame_size);
+ update_rq_history(rq_history, rc->this_frame_target,
+ rc->projected_frame_size, q);
+ loop_count += 1;
- rq_model_update(rq_history, rc->this_frame_target, rq_model);
+ rq_model_update(rq_history, rc->this_frame_target, rq_model);
- // Check if we hit the target bitrate.
- if (percent_diff <= cpi->encode_command.target_frame_bits_error_percent ||
- rq_history->recode_count >= RATE_CTRL_MAX_RECODE_NUM ||
- rq_history->q_index_low >= rq_history->q_index_high) {
- break;
- }
+ // Check if we hit the target bitrate.
+ if (percent_diff <=
+ cpi->encode_command.target_frame_bits_error_percent ||
+ rq_history->recode_count >= RATE_CTRL_MAX_RECODE_NUM ||
+ rq_history->q_index_low >= rq_history->q_index_high) {
+ break;
+ }
- loop = 1;
- restore_coding_context(cpi);
- continue;
+ loop = 1;
+ restore_coding_context(cpi);
+ continue;
+ }
}
#endif // CONFIG_RATE_CTRL
@@ -4754,6 +4790,23 @@
rc->projected_frame_size < rc->max_frame_bandwidth)
loop = 0;
+ // Special handling of external max frame size constraint
+ if (ext_rc_recode) {
+ // If the largest q is not able to meet the max frame size limit,
+ // do nothing.
+ if (rc->projected_frame_size > ext_rc_max_frame_size &&
+ last_q_attempt == 255) {
+ break;
+ }
+ // If VP9's q selection leads to a smaller q, we force it to use
+ // a larger q to better approximate the external max frame size
+ // constraint.
+ if (rc->projected_frame_size > ext_rc_max_frame_size &&
+ q <= last_q_attempt) {
+ q = VPXMIN(255, last_q_attempt + 1);
+ }
+ }
+
if (loop) {
++loop_count;
++loop_at_this_size;
@@ -4767,6 +4820,8 @@
if (loop) restore_coding_context(cpi);
} while (loop);
+ rc->max_frame_bandwidth = orig_rc_max_frame_bandwidth;
+
#ifdef AGGRESSIVE_VBR
if (two_pass_first_group_inter(cpi)) {
cpi->twopass.active_worst_quality =
@@ -5300,17 +5355,81 @@
}
#if !CONFIG_REALTIME_ONLY
-static void update_encode_frame_result(
+static void update_encode_frame_result_basic(
+ FRAME_UPDATE_TYPE update_type, int show_idx, int quantize_index,
+ ENCODE_FRAME_RESULT *encode_frame_result) {
+ encode_frame_result->show_idx = show_idx;
+ encode_frame_result->update_type = update_type;
+ encode_frame_result->quantize_index = quantize_index;
+}
+
+#if CONFIG_RATE_CTRL
+static void yv12_buffer_to_image_buffer(const YV12_BUFFER_CONFIG *yv12_buffer,
+ IMAGE_BUFFER *image_buffer) {
+ const uint8_t *src_buf_ls[3] = { yv12_buffer->y_buffer, yv12_buffer->u_buffer,
+ yv12_buffer->v_buffer };
+ const int src_stride_ls[3] = { yv12_buffer->y_stride, yv12_buffer->uv_stride,
+ yv12_buffer->uv_stride };
+ const int w_ls[3] = { yv12_buffer->y_crop_width, yv12_buffer->uv_crop_width,
+ yv12_buffer->uv_crop_width };
+ const int h_ls[3] = { yv12_buffer->y_crop_height, yv12_buffer->uv_crop_height,
+ yv12_buffer->uv_crop_height };
+ int plane;
+ for (plane = 0; plane < 3; ++plane) {
+ const int src_stride = src_stride_ls[plane];
+ const int w = w_ls[plane];
+ const int h = h_ls[plane];
+ const uint8_t *src_buf = src_buf_ls[plane];
+ uint8_t *dst_buf = image_buffer->plane_buffer[plane];
+ int r;
+ assert(image_buffer->plane_width[plane] == w);
+ assert(image_buffer->plane_height[plane] == h);
+ for (r = 0; r < h; ++r) {
+ memcpy(dst_buf, src_buf, sizeof(*src_buf) * w);
+ src_buf += src_stride;
+ dst_buf += w;
+ }
+ }
+}
+// This function will update extra information specific for simple_encode APIs
+static void update_encode_frame_result_simple_encode(
int ref_frame_flags, FRAME_UPDATE_TYPE update_type,
const YV12_BUFFER_CONFIG *source_frame, const RefCntBuffer *coded_frame_buf,
- RefCntBuffer *ref_frame_buf[MAX_INTER_REF_FRAMES], int quantize_index,
+ RefCntBuffer *ref_frame_bufs[MAX_INTER_REF_FRAMES], int quantize_index,
uint32_t bit_depth, uint32_t input_bit_depth, const FRAME_COUNTS *counts,
-#if CONFIG_RATE_CTRL
const PARTITION_INFO *partition_info,
const MOTION_VECTOR_INFO *motion_vector_info,
const TplDepStats *tpl_stats_info,
+ ENCODE_FRAME_RESULT *encode_frame_result) {
+ PSNR_STATS psnr;
+ update_encode_frame_result_basic(update_type, coded_frame_buf->frame_index,
+ quantize_index, encode_frame_result);
+#if CONFIG_VP9_HIGHBITDEPTH
+ vpx_calc_highbd_psnr(source_frame, &coded_frame_buf->buf, &psnr, bit_depth,
+ input_bit_depth);
+#else // CONFIG_VP9_HIGHBITDEPTH
+ (void)bit_depth;
+ (void)input_bit_depth;
+ vpx_calc_psnr(source_frame, &coded_frame_buf->buf, &psnr);
+#endif // CONFIG_VP9_HIGHBITDEPTH
+ encode_frame_result->frame_coding_index = coded_frame_buf->frame_coding_index;
+
+ vp9_get_ref_frame_info(update_type, ref_frame_flags, ref_frame_bufs,
+ encode_frame_result->ref_frame_coding_indexes,
+ encode_frame_result->ref_frame_valid_list);
+
+ encode_frame_result->psnr = psnr.psnr[0];
+ encode_frame_result->sse = psnr.sse[0];
+ encode_frame_result->frame_counts = *counts;
+ encode_frame_result->partition_info = partition_info;
+ encode_frame_result->motion_vector_info = motion_vector_info;
+ encode_frame_result->tpl_stats_info = tpl_stats_info;
+ if (encode_frame_result->coded_frame.allocated) {
+ yv12_buffer_to_image_buffer(&coded_frame_buf->buf,
+ &encode_frame_result->coded_frame);
+ }
+}
#endif // CONFIG_RATE_CTRL
- ENCODE_FRAME_RESULT *encode_frame_result);
#endif // !CONFIG_REALTIME_ONLY
static void encode_frame_to_data_rate(
@@ -5405,10 +5524,14 @@
memset(cpi->mode_chosen_counts, 0,
MAX_MODES * sizeof(*cpi->mode_chosen_counts));
#endif
-#if CONFIG_CONSISTENT_RECODE || CONFIG_RATE_CTRL
+#if CONFIG_CONSISTENT_RECODE
// Backup to ensure consistency between recodes
save_encode_params(cpi);
-#endif // CONFIG_CONSISTENT_RECODE || CONFIG_RATE_CTRL
+#elif CONFIG_RATE_CTRL
+ if (cpi->oxcf.use_simple_encode_api) {
+ save_encode_params(cpi);
+ }
+#endif
if (cpi->sf.recode_loop == DISALLOW_RECODE) {
if (!encode_without_recode_loop(cpi, size, dest)) return;
@@ -5487,9 +5610,13 @@
{
const RefCntBuffer *coded_frame_buf =
get_ref_cnt_buffer(cm, cm->new_fb_idx);
- vp9_extrc_update_encodeframe_result(
+ vpx_codec_err_t codec_status = vp9_extrc_update_encodeframe_result(
&cpi->ext_ratectrl, (*size) << 3, cpi->Source, &coded_frame_buf->buf,
- cm->bit_depth, cpi->oxcf.input_bit_depth);
+ cm->bit_depth, cpi->oxcf.input_bit_depth, cm->base_qindex);
+ if (codec_status != VPX_CODEC_OK) {
+ vpx_internal_error(&cm->error, codec_status,
+ "vp9_extrc_update_encodeframe_result() failed");
+ }
}
#if CONFIG_REALTIME_ONLY
(void)encode_frame_result;
@@ -5496,10 +5623,12 @@
assert(encode_frame_result == NULL);
#else // CONFIG_REALTIME_ONLY
if (encode_frame_result != NULL) {
- const int ref_frame_flags = get_ref_frame_flags(cpi);
const RefCntBuffer *coded_frame_buf =
get_ref_cnt_buffer(cm, cm->new_fb_idx);
RefCntBuffer *ref_frame_bufs[MAX_INTER_REF_FRAMES];
+ FRAME_UPDATE_TYPE update_type =
+ cpi->twopass.gf_group.update_type[cpi->twopass.gf_group.index];
+ int quantize_index = vp9_get_quantizer(cpi);
get_ref_frame_bufs(cpi, ref_frame_bufs);
// update_encode_frame_result() depends on twopass.gf_group.index and
// cm->new_fb_idx, cpi->Source, cpi->lst_fb_idx, cpi->gld_fb_idx and
@@ -5517,15 +5646,21 @@
// This function needs to be called before vp9_update_reference_frames().
// TODO(angiebird): Improve the codebase to make the update of frame
// dependent variables more robust.
- update_encode_frame_result(
- ref_frame_flags,
- cpi->twopass.gf_group.update_type[cpi->twopass.gf_group.index],
- cpi->Source, coded_frame_buf, ref_frame_bufs, vp9_get_quantizer(cpi),
- cm->bit_depth, cpi->oxcf.input_bit_depth, cpi->td.counts,
+
+ update_encode_frame_result_basic(update_type, coded_frame_buf->frame_index,
+ quantize_index, encode_frame_result);
#if CONFIG_RATE_CTRL
- cpi->partition_info, cpi->motion_vector_info, cpi->tpl_stats_info,
+ if (cpi->oxcf.use_simple_encode_api) {
+ const int ref_frame_flags = get_ref_frame_flags(cpi);
+ update_encode_frame_result_simple_encode(
+ ref_frame_flags,
+ cpi->twopass.gf_group.update_type[cpi->twopass.gf_group.index],
+ cpi->Source, coded_frame_buf, ref_frame_bufs, quantize_index,
+ cm->bit_depth, cpi->oxcf.input_bit_depth, cpi->td.counts,
+ cpi->partition_info, cpi->motion_vector_info, cpi->tpl_stats_info,
+ encode_frame_result);
+ }
#endif // CONFIG_RATE_CTRL
- encode_frame_result);
}
#endif // CONFIG_REALTIME_ONLY
@@ -5591,7 +5726,8 @@
vp9_rc_postencode_update(cpi, *size);
- if (oxcf->pass == 0 && !frame_is_intra_only(cm) &&
+ if (cpi->compute_frame_low_motion_onepass && oxcf->pass == 0 &&
+ !frame_is_intra_only(cm) &&
(!cpi->use_svc ||
(cpi->use_svc &&
!cpi->svc.layer_context[cpi->svc.temporal_layer_id].is_key_frame &&
@@ -5680,8 +5816,13 @@
cpi->allow_encode_breakout = ENCODE_BREAKOUT_ENABLED;
if (cpi->common.current_frame_coding_index == 0) {
- vp9_extrc_send_firstpass_stats(&cpi->ext_ratectrl,
- &cpi->twopass.first_pass_info);
+ VP9_COMMON *cm = &cpi->common;
+ const vpx_codec_err_t codec_status = vp9_extrc_send_firstpass_stats(
+ &cpi->ext_ratectrl, &cpi->twopass.first_pass_info);
+ if (codec_status != VPX_CODEC_OK) {
+ vpx_internal_error(&cm->error, codec_status,
+ "vp9_extrc_send_firstpass_stats() failed");
+ }
}
#if CONFIG_MISMATCH_DEBUG
mismatch_move_frame_idx_w();
@@ -7440,7 +7581,9 @@
#endif // CONFIG_NON_GREEDY_MV
#if CONFIG_RATE_CTRL
- accumulate_frame_tpl_stats(cpi);
+ if (cpi->oxcf.use_simple_encode_api) {
+ accumulate_frame_tpl_stats(cpi);
+ }
#endif // CONFIG_RATE_CTRL
}
@@ -7468,206 +7611,6 @@
}
}
-#if !CONFIG_REALTIME_ONLY
-#if CONFIG_RATE_CTRL
-static void copy_frame_counts(const FRAME_COUNTS *input_counts,
- FRAME_COUNTS *output_counts) {
- int i, j, k, l, m, n;
- for (i = 0; i < BLOCK_SIZE_GROUPS; ++i) {
- for (j = 0; j < INTRA_MODES; ++j) {
- output_counts->y_mode[i][j] = input_counts->y_mode[i][j];
- }
- }
- for (i = 0; i < INTRA_MODES; ++i) {
- for (j = 0; j < INTRA_MODES; ++j) {
- output_counts->uv_mode[i][j] = input_counts->uv_mode[i][j];
- }
- }
- for (i = 0; i < PARTITION_CONTEXTS; ++i) {
- for (j = 0; j < PARTITION_TYPES; ++j) {
- output_counts->partition[i][j] = input_counts->partition[i][j];
- }
- }
- for (i = 0; i < TX_SIZES; ++i) {
- for (j = 0; j < PLANE_TYPES; ++j) {
- for (k = 0; k < REF_TYPES; ++k) {
- for (l = 0; l < COEF_BANDS; ++l) {
- for (m = 0; m < COEFF_CONTEXTS; ++m) {
- output_counts->eob_branch[i][j][k][l][m] =
- input_counts->eob_branch[i][j][k][l][m];
- for (n = 0; n < UNCONSTRAINED_NODES + 1; ++n) {
- output_counts->coef[i][j][k][l][m][n] =
- input_counts->coef[i][j][k][l][m][n];
- }
- }
- }
- }
- }
- }
- for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; ++i) {
- for (j = 0; j < SWITCHABLE_FILTERS; ++j) {
- output_counts->switchable_interp[i][j] =
- input_counts->switchable_interp[i][j];
- }
- }
- for (i = 0; i < INTER_MODE_CONTEXTS; ++i) {
- for (j = 0; j < INTER_MODES; ++j) {
- output_counts->inter_mode[i][j] = input_counts->inter_mode[i][j];
- }
- }
- for (i = 0; i < INTRA_INTER_CONTEXTS; ++i) {
- for (j = 0; j < 2; ++j) {
- output_counts->intra_inter[i][j] = input_counts->intra_inter[i][j];
- }
- }
- for (i = 0; i < COMP_INTER_CONTEXTS; ++i) {
- for (j = 0; j < 2; ++j) {
- output_counts->comp_inter[i][j] = input_counts->comp_inter[i][j];
- }
- }
- for (i = 0; i < REF_CONTEXTS; ++i) {
- for (j = 0; j < 2; ++j) {
- for (k = 0; k < 2; ++k) {
- output_counts->single_ref[i][j][k] = input_counts->single_ref[i][j][k];
- }
- }
- }
- for (i = 0; i < REF_CONTEXTS; ++i) {
- for (j = 0; j < 2; ++j) {
- output_counts->comp_ref[i][j] = input_counts->comp_ref[i][j];
- }
- }
- for (i = 0; i < SKIP_CONTEXTS; ++i) {
- for (j = 0; j < 2; ++j) {
- output_counts->skip[i][j] = input_counts->skip[i][j];
- }
- }
- for (i = 0; i < TX_SIZE_CONTEXTS; i++) {
- for (j = 0; j < TX_SIZES; j++) {
- output_counts->tx.p32x32[i][j] = input_counts->tx.p32x32[i][j];
- }
- for (j = 0; j < TX_SIZES - 1; j++) {
- output_counts->tx.p16x16[i][j] = input_counts->tx.p16x16[i][j];
- }
- for (j = 0; j < TX_SIZES - 2; j++) {
- output_counts->tx.p8x8[i][j] = input_counts->tx.p8x8[i][j];
- }
- }
- for (i = 0; i < TX_SIZES; i++) {
- output_counts->tx.tx_totals[i] = input_counts->tx.tx_totals[i];
- }
- for (i = 0; i < MV_JOINTS; i++) {
- output_counts->mv.joints[i] = input_counts->mv.joints[i];
- }
- for (k = 0; k < 2; k++) {
- nmv_component_counts *const comps = &output_counts->mv.comps[k];
- const nmv_component_counts *const comps_t = &input_counts->mv.comps[k];
- for (i = 0; i < 2; i++) {
- comps->sign[i] = comps_t->sign[i];
- comps->class0_hp[i] = comps_t->class0_hp[i];
- comps->hp[i] = comps_t->hp[i];
- }
- for (i = 0; i < MV_CLASSES; i++) {
- comps->classes[i] = comps_t->classes[i];
- }
- for (i = 0; i < CLASS0_SIZE; i++) {
- comps->class0[i] = comps_t->class0[i];
- for (j = 0; j < MV_FP_SIZE; j++) {
- comps->class0_fp[i][j] = comps_t->class0_fp[i][j];
- }
- }
- for (i = 0; i < MV_OFFSET_BITS; i++) {
- for (j = 0; j < 2; j++) {
- comps->bits[i][j] = comps_t->bits[i][j];
- }
- }
- for (i = 0; i < MV_FP_SIZE; i++) {
- comps->fp[i] = comps_t->fp[i];
- }
- }
-}
-
-static void yv12_buffer_to_image_buffer(const YV12_BUFFER_CONFIG *yv12_buffer,
- IMAGE_BUFFER *image_buffer) {
- const uint8_t *src_buf_ls[3] = { yv12_buffer->y_buffer, yv12_buffer->u_buffer,
- yv12_buffer->v_buffer };
- const int src_stride_ls[3] = { yv12_buffer->y_stride, yv12_buffer->uv_stride,
- yv12_buffer->uv_stride };
- const int w_ls[3] = { yv12_buffer->y_crop_width, yv12_buffer->uv_crop_width,
- yv12_buffer->uv_crop_width };
- const int h_ls[3] = { yv12_buffer->y_crop_height, yv12_buffer->uv_crop_height,
- yv12_buffer->uv_crop_height };
- int plane;
- for (plane = 0; plane < 3; ++plane) {
- const int src_stride = src_stride_ls[plane];
- const int w = w_ls[plane];
- const int h = h_ls[plane];
- const uint8_t *src_buf = src_buf_ls[plane];
- uint8_t *dst_buf = image_buffer->plane_buffer[plane];
- int r;
- assert(image_buffer->plane_width[plane] == w);
- assert(image_buffer->plane_height[plane] == h);
- for (r = 0; r < h; ++r) {
- memcpy(dst_buf, src_buf, sizeof(*src_buf) * w);
- src_buf += src_stride;
- dst_buf += w;
- }
- }
-}
-#endif // CONFIG_RATE_CTRL
-
-static void update_encode_frame_result(
- int ref_frame_flags, FRAME_UPDATE_TYPE update_type,
- const YV12_BUFFER_CONFIG *source_frame, const RefCntBuffer *coded_frame_buf,
- RefCntBuffer *ref_frame_bufs[MAX_INTER_REF_FRAMES], int quantize_index,
- uint32_t bit_depth, uint32_t input_bit_depth, const FRAME_COUNTS *counts,
-#if CONFIG_RATE_CTRL
- const PARTITION_INFO *partition_info,
- const MOTION_VECTOR_INFO *motion_vector_info,
- const TplDepStats *tpl_stats_info,
-#endif // CONFIG_RATE_CTRL
- ENCODE_FRAME_RESULT *encode_frame_result) {
-#if CONFIG_RATE_CTRL
- PSNR_STATS psnr;
-#if CONFIG_VP9_HIGHBITDEPTH
- vpx_calc_highbd_psnr(source_frame, &coded_frame_buf->buf, &psnr, bit_depth,
- input_bit_depth);
-#else // CONFIG_VP9_HIGHBITDEPTH
- (void)bit_depth;
- (void)input_bit_depth;
- vpx_calc_psnr(source_frame, &coded_frame_buf->buf, &psnr);
-#endif // CONFIG_VP9_HIGHBITDEPTH
- encode_frame_result->frame_coding_index = coded_frame_buf->frame_coding_index;
-
- vp9_get_ref_frame_info(update_type, ref_frame_flags, ref_frame_bufs,
- encode_frame_result->ref_frame_coding_indexes,
- encode_frame_result->ref_frame_valid_list);
-
- encode_frame_result->psnr = psnr.psnr[0];
- encode_frame_result->sse = psnr.sse[0];
- copy_frame_counts(counts, &encode_frame_result->frame_counts);
- encode_frame_result->partition_info = partition_info;
- encode_frame_result->motion_vector_info = motion_vector_info;
- encode_frame_result->tpl_stats_info = tpl_stats_info;
- if (encode_frame_result->coded_frame.allocated) {
- yv12_buffer_to_image_buffer(&coded_frame_buf->buf,
- &encode_frame_result->coded_frame);
- }
-#else // CONFIG_RATE_CTRL
- (void)ref_frame_flags;
- (void)bit_depth;
- (void)input_bit_depth;
- (void)source_frame;
- (void)coded_frame_buf;
- (void)ref_frame_bufs;
- (void)counts;
-#endif // CONFIG_RATE_CTRL
- encode_frame_result->show_idx = coded_frame_buf->frame_index;
- encode_frame_result->update_type = update_type;
- encode_frame_result->quantize_index = quantize_index;
-}
-#endif // !CONFIG_REALTIME_ONLY
-
void vp9_init_encode_frame_result(ENCODE_FRAME_RESULT *encode_frame_result) {
encode_frame_result->show_idx = -1; // Actual encoding doesn't happen.
#if CONFIG_RATE_CTRL
@@ -7861,9 +7804,12 @@
cm->new_fb_idx = get_free_fb(cm);
if (cm->new_fb_idx == INVALID_IDX) return -1;
-
cm->cur_frame = &pool->frame_bufs[cm->new_fb_idx];
-
+ // If the frame buffer for current frame is the same as previous frame, MV in
+ // the base layer shouldn't be used as it'll cause data race.
+ if (cpi->svc.spatial_layer_id > 0 && cm->cur_frame == cm->prev_frame) {
+ cpi->svc.use_base_mv = 0;
+ }
// Start with a 0 size frame.
*size = 0;
--- a/vp9/encoder/vp9_encoder.h
+++ b/vp9/encoder/vp9_encoder.h
@@ -273,10 +273,6 @@
vpx_fixed_buf_t two_pass_stats_in;
-#if CONFIG_FP_MB_STATS
- vpx_fixed_buf_t firstpass_mb_stats_in;
-#endif
-
vp8e_tuning tuning;
vp9e_tune_content content;
#if CONFIG_VP9_HIGHBITDEPTH
@@ -291,6 +287,7 @@
int row_mt;
unsigned int motion_vector_unit_test;
int delta_q_uv;
+ int use_simple_encode_api; // Use SimpleEncode APIs or not
} VP9EncoderConfig;
static INLINE int is_lossless_requested(const VP9EncoderConfig *cfg) {
@@ -710,9 +707,6 @@
TileDataEnc *tile_data;
int allocated_tiles; // Keep track of memory allocated for tiles.
- // For a still frame, this flag is set to 1 to skip partition search.
- int partition_search_skippable_frame;
-
int scaled_ref_idx[REFS_PER_FRAME];
int lst_fb_idx;
int gld_fb_idx;
@@ -746,6 +740,7 @@
// Ambient reconstruction err target for force key frames
int64_t ambient_err;
+ RD_CONTROL rd_ctrl;
RD_OPT rd;
CODING_CONTEXT coding_context;
@@ -804,10 +799,6 @@
uint64_t time_pick_lpf;
uint64_t time_encode_sb_row;
-#if CONFIG_FP_MB_STATS
- int use_fp_mb_stats;
-#endif
-
TWO_PASS twopass;
// Force recalculation of segment_ids for each mode info
@@ -958,6 +949,8 @@
uint8_t *content_state_sb_fd;
int compute_source_sad_onepass;
+
+ int compute_frame_low_motion_onepass;
LevelConstraint level_constraint;
--- a/vp9/encoder/vp9_ext_ratectrl.c
+++ b/vp9/encoder/vp9_ext_ratectrl.c
@@ -13,31 +13,56 @@
#include "vp9/common/vp9_common.h"
#include "vpx_dsp/psnr.h"
-void vp9_extrc_init(EXT_RATECTRL *ext_ratectrl) { vp9_zero(*ext_ratectrl); }
+vpx_codec_err_t vp9_extrc_init(EXT_RATECTRL *ext_ratectrl) {
+ if (ext_ratectrl == NULL) {
+ return VPX_CODEC_INVALID_PARAM;
+ }
+ vp9_zero(*ext_ratectrl);
+ return VPX_CODEC_OK;
+}
-void vp9_extrc_create(vpx_rc_funcs_t funcs, vpx_rc_config_t ratectrl_config,
- EXT_RATECTRL *ext_ratectrl) {
+vpx_codec_err_t vp9_extrc_create(vpx_rc_funcs_t funcs,
+ vpx_rc_config_t ratectrl_config,
+ EXT_RATECTRL *ext_ratectrl) {
+ vpx_rc_status_t rc_status;
vpx_rc_firstpass_stats_t *rc_firstpass_stats;
+ if (ext_ratectrl == NULL) {
+ return VPX_CODEC_INVALID_PARAM;
+ }
vp9_extrc_delete(ext_ratectrl);
ext_ratectrl->funcs = funcs;
ext_ratectrl->ratectrl_config = ratectrl_config;
- ext_ratectrl->funcs.create_model(ext_ratectrl->funcs.priv,
- &ext_ratectrl->ratectrl_config,
- &ext_ratectrl->model);
+ rc_status = ext_ratectrl->funcs.create_model(ext_ratectrl->funcs.priv,
+ &ext_ratectrl->ratectrl_config,
+ &ext_ratectrl->model);
+ if (rc_status == VPX_RC_ERROR) {
+ return VPX_CODEC_ERROR;
+ }
rc_firstpass_stats = &ext_ratectrl->rc_firstpass_stats;
rc_firstpass_stats->num_frames = ratectrl_config.show_frame_count;
rc_firstpass_stats->frame_stats =
vpx_malloc(sizeof(*rc_firstpass_stats->frame_stats) *
rc_firstpass_stats->num_frames);
+ if (rc_firstpass_stats->frame_stats == NULL) {
+ return VPX_CODEC_MEM_ERROR;
+ }
ext_ratectrl->ready = 1;
+ return VPX_CODEC_OK;
}
-void vp9_extrc_delete(EXT_RATECTRL *ext_ratectrl) {
+vpx_codec_err_t vp9_extrc_delete(EXT_RATECTRL *ext_ratectrl) {
+ if (ext_ratectrl == NULL) {
+ return VPX_CODEC_INVALID_PARAM;
+ }
if (ext_ratectrl->ready) {
- ext_ratectrl->funcs.delete_model(ext_ratectrl->model);
+ vpx_rc_status_t rc_status =
+ ext_ratectrl->funcs.delete_model(ext_ratectrl->model);
+ if (rc_status == VPX_RC_ERROR) {
+ return VPX_CODEC_ERROR;
+ }
vpx_free(ext_ratectrl->rc_firstpass_stats.frame_stats);
}
- vp9_extrc_init(ext_ratectrl);
+ return vp9_extrc_init(ext_ratectrl);
}
static void gen_rc_firstpass_stats(const FIRSTPASS_STATS *stats,
@@ -69,9 +94,13 @@
rc_frame_stats->count = stats->count;
}
-void vp9_extrc_send_firstpass_stats(EXT_RATECTRL *ext_ratectrl,
- const FIRST_PASS_INFO *first_pass_info) {
+vpx_codec_err_t vp9_extrc_send_firstpass_stats(
+ EXT_RATECTRL *ext_ratectrl, const FIRST_PASS_INFO *first_pass_info) {
+ if (ext_ratectrl == NULL) {
+ return VPX_CODEC_INVALID_PARAM;
+ }
if (ext_ratectrl->ready) {
+ vpx_rc_status_t rc_status;
vpx_rc_firstpass_stats_t *rc_firstpass_stats =
&ext_ratectrl->rc_firstpass_stats;
int i;
@@ -80,9 +109,13 @@
gen_rc_firstpass_stats(&first_pass_info->stats[i],
&rc_firstpass_stats->frame_stats[i]);
}
- ext_ratectrl->funcs.send_firstpass_stats(ext_ratectrl->model,
- rc_firstpass_stats);
+ rc_status = ext_ratectrl->funcs.send_firstpass_stats(ext_ratectrl->model,
+ rc_firstpass_stats);
+ if (rc_status == VPX_RC_ERROR) {
+ return VPX_CODEC_ERROR;
+ }
}
+ return VPX_CODEC_OK;
}
static int extrc_get_frame_type(FRAME_UPDATE_TYPE update_type) {
@@ -102,12 +135,16 @@
}
}
-void vp9_extrc_get_encodeframe_decision(
+vpx_codec_err_t vp9_extrc_get_encodeframe_decision(
EXT_RATECTRL *ext_ratectrl, int show_index, int coding_index, int gop_index,
FRAME_UPDATE_TYPE update_type,
RefCntBuffer *ref_frame_bufs[MAX_INTER_REF_FRAMES], int ref_frame_flags,
vpx_rc_encodeframe_decision_t *encode_frame_decision) {
+ if (ext_ratectrl == NULL) {
+ return VPX_CODEC_INVALID_PARAM;
+ }
if (ext_ratectrl->ready) {
+ vpx_rc_status_t rc_status;
vpx_rc_encodeframe_info_t encode_frame_info;
encode_frame_info.show_index = show_index;
encode_frame_info.coding_index = coding_index;
@@ -118,24 +155,32 @@
encode_frame_info.ref_frame_coding_indexes,
encode_frame_info.ref_frame_valid_list);
- ext_ratectrl->funcs.get_encodeframe_decision(
+ rc_status = ext_ratectrl->funcs.get_encodeframe_decision(
ext_ratectrl->model, &encode_frame_info, encode_frame_decision);
+ if (rc_status == VPX_RC_ERROR) {
+ return VPX_CODEC_ERROR;
+ }
}
+ return VPX_CODEC_OK;
}
-void vp9_extrc_update_encodeframe_result(EXT_RATECTRL *ext_ratectrl,
- int64_t bit_count,
- const YV12_BUFFER_CONFIG *source_frame,
- const YV12_BUFFER_CONFIG *coded_frame,
- uint32_t bit_depth,
- uint32_t input_bit_depth) {
+vpx_codec_err_t vp9_extrc_update_encodeframe_result(
+ EXT_RATECTRL *ext_ratectrl, int64_t bit_count,
+ const YV12_BUFFER_CONFIG *source_frame,
+ const YV12_BUFFER_CONFIG *coded_frame, uint32_t bit_depth,
+ uint32_t input_bit_depth, const int actual_encoding_qindex) {
+ if (ext_ratectrl == NULL) {
+ return VPX_CODEC_INVALID_PARAM;
+ }
if (ext_ratectrl->ready) {
PSNR_STATS psnr;
+ vpx_rc_status_t rc_status;
vpx_rc_encodeframe_result_t encode_frame_result;
encode_frame_result.bit_count = bit_count;
encode_frame_result.pixel_count =
- source_frame->y_width * source_frame->y_height +
- 2 * source_frame->uv_width * source_frame->uv_height;
+ source_frame->y_crop_width * source_frame->y_crop_height +
+ 2 * source_frame->uv_crop_width * source_frame->uv_crop_height;
+ encode_frame_result.actual_encoding_qindex = actual_encoding_qindex;
#if CONFIG_VP9_HIGHBITDEPTH
vpx_calc_highbd_psnr(source_frame, coded_frame, &psnr, bit_depth,
input_bit_depth);
@@ -145,7 +190,11 @@
vpx_calc_psnr(source_frame, coded_frame, &psnr);
#endif
encode_frame_result.sse = psnr.sse[0];
- ext_ratectrl->funcs.update_encodeframe_result(ext_ratectrl->model,
- &encode_frame_result);
+ rc_status = ext_ratectrl->funcs.update_encodeframe_result(
+ ext_ratectrl->model, &encode_frame_result);
+ if (rc_status == VPX_RC_ERROR) {
+ return VPX_CODEC_ERROR;
+ }
}
+ return VPX_CODEC_OK;
}
--- a/vp9/encoder/vp9_ext_ratectrl.h
+++ b/vp9/encoder/vp9_ext_ratectrl.h
@@ -22,27 +22,27 @@
vpx_rc_firstpass_stats_t rc_firstpass_stats;
} EXT_RATECTRL;
-void vp9_extrc_init(EXT_RATECTRL *ext_ratectrl);
+vpx_codec_err_t vp9_extrc_init(EXT_RATECTRL *ext_ratectrl);
-void vp9_extrc_create(vpx_rc_funcs_t funcs, vpx_rc_config_t ratectrl_config,
- EXT_RATECTRL *ext_ratectrl);
+vpx_codec_err_t vp9_extrc_create(vpx_rc_funcs_t funcs,
+ vpx_rc_config_t ratectrl_config,
+ EXT_RATECTRL *ext_ratectrl);
-void vp9_extrc_delete(EXT_RATECTRL *ext_ratectrl);
+vpx_codec_err_t vp9_extrc_delete(EXT_RATECTRL *ext_ratectrl);
-void vp9_extrc_send_firstpass_stats(EXT_RATECTRL *ext_ratectrl,
- const FIRST_PASS_INFO *first_pass_info);
+vpx_codec_err_t vp9_extrc_send_firstpass_stats(
+ EXT_RATECTRL *ext_ratectrl, const FIRST_PASS_INFO *first_pass_info);
-void vp9_extrc_get_encodeframe_decision(
+vpx_codec_err_t vp9_extrc_get_encodeframe_decision(
EXT_RATECTRL *ext_ratectrl, int show_index, int coding_index, int gop_index,
FRAME_UPDATE_TYPE update_type,
RefCntBuffer *ref_frame_bufs[MAX_INTER_REF_FRAMES], int ref_frame_flags,
vpx_rc_encodeframe_decision_t *encode_frame_decision);
-void vp9_extrc_update_encodeframe_result(EXT_RATECTRL *ext_ratectrl,
- int64_t bit_count,
- const YV12_BUFFER_CONFIG *source_frame,
- const YV12_BUFFER_CONFIG *coded_frame,
- uint32_t bit_depth,
- uint32_t input_bit_depth);
+vpx_codec_err_t vp9_extrc_update_encodeframe_result(
+ EXT_RATECTRL *ext_ratectrl, int64_t bit_count,
+ const YV12_BUFFER_CONFIG *source_frame,
+ const YV12_BUFFER_CONFIG *coded_frame, uint32_t bit_depth,
+ uint32_t input_bit_depth, const int actual_encoding_qindex);
#endif // VPX_VP9_ENCODER_VP9_EXT_RATECTRL_H_
--- a/vp9/encoder/vp9_firstpass.c
+++ b/vp9/encoder/vp9_firstpass.c
@@ -54,6 +54,30 @@
#define NCOUNT_INTRA_THRESH 8192
#define NCOUNT_INTRA_FACTOR 3
+#define INTRA_PART 0.005
+#define DEFAULT_DECAY_LIMIT 0.75
+#define LOW_SR_DIFF_TRHESH 0.1
+#define LOW_CODED_ERR_PER_MB 10.0
+#define NCOUNT_FRAME_II_THRESH 6.0
+#define BASELINE_ERR_PER_MB 12500.0
+#define GF_MAX_FRAME_BOOST 96.0
+
+#ifdef AGGRESSIVE_VBR
+#define KF_MIN_FRAME_BOOST 40.0
+#define KF_MAX_FRAME_BOOST 80.0
+#define MAX_KF_TOT_BOOST 4800
+#else
+#define KF_MIN_FRAME_BOOST 40.0
+#define KF_MAX_FRAME_BOOST 96.0
+#define MAX_KF_TOT_BOOST 5400
+#endif
+
+#define DEFAULT_ZM_FACTOR 0.5
+#define MINQ_ADJ_LIMIT 48
+#define MINQ_ADJ_LIMIT_CQ 20
+#define HIGH_UNDERSHOOT_RATIO 2
+#define AV_WQ_FACTOR 4.0
+
#define DOUBLE_DIVIDE_CHECK(x) ((x) < 0 ? (x)-0.000001 : (x) + 0.000001)
#if ARF_STATS_OUTPUT
@@ -111,17 +135,6 @@
#endif
}
-#if CONFIG_FP_MB_STATS
-static void output_fpmb_stats(uint8_t *this_frame_mb_stats, VP9_COMMON *cm,
- struct vpx_codec_pkt_list *pktlist) {
- struct vpx_codec_cx_pkt pkt;
- pkt.kind = VPX_CODEC_FPMB_STATS_PKT;
- pkt.data.firstpass_mb_stats.buf = this_frame_mb_stats;
- pkt.data.firstpass_mb_stats.sz = cm->initial_mbs * sizeof(uint8_t);
- vpx_codec_pkt_list_add(pktlist, &pkt);
-}
-#endif
-
static void zero_stats(FIRSTPASS_STATS *section) {
section->frame = 0.0;
section->weight = 0.0;
@@ -929,10 +942,6 @@
int level_sample;
const int mb_index = mb_row * cm->mb_cols + mb_col;
-#if CONFIG_FP_MB_STATS
- const int mb_index = mb_row * cm->mb_cols + mb_col;
-#endif
-
(*(cpi->row_mt_sync_read_ptr))(&tile_data->row_mt_sync, mb_row, c);
// Adjust to the next column of MBs.
@@ -1068,13 +1077,6 @@
// Accumulate the intra error.
fp_acc_data->intra_error += (int64_t)this_error;
-#if CONFIG_FP_MB_STATS
- if (cpi->use_fp_mb_stats) {
- // initialization
- cpi->twopass.frame_mb_stats_buf[mb_index] = 0;
- }
-#endif
-
// Set up limit values for motion vectors to prevent them extending
// outside the UMV borders.
x->mv_limits.col_min = -((mb_col * 16) + BORDER_MV_PIXELS_B16);
@@ -1081,8 +1083,8 @@
x->mv_limits.col_max =
((cm->mb_cols - 1 - mb_col) * 16) + BORDER_MV_PIXELS_B16;
- // Other than for the first frame do a motion search.
- if (cm->current_video_frame > 0) {
+ // Other than for intra-only frame do a motion search.
+ if (!frame_is_intra_only(cm)) {
int tmp_err, motion_error, this_motion_error, raw_motion_error;
// Assume 0,0 motion with no mv overhead.
MV mv = { 0, 0 }, tmp_mv = { 0, 0 };
@@ -1090,8 +1092,10 @@
vp9_variance_fn_ptr_t v_fn_ptr = cpi->fn_ptr[bsize];
#if CONFIG_RATE_CTRL
- // Store zero mv as default
- store_fp_motion_vector(cpi, &mv, mb_row, mb_col, LAST_FRAME, 0);
+ if (cpi->oxcf.use_simple_encode_api) {
+ // Store zero mv as default
+ store_fp_motion_vector(cpi, &mv, mb_row, mb_col, LAST_FRAME, 0);
+ }
#endif // CONFIG_RAGE_CTRL
xd->plane[0].pre[0].buf = first_ref_buf->y_buffer + recon_yoffset;
@@ -1159,7 +1163,9 @@
}
}
#if CONFIG_RATE_CTRL
- store_fp_motion_vector(cpi, &mv, mb_row, mb_col, LAST_FRAME, 0);
+ if (cpi->oxcf.use_simple_encode_api) {
+ store_fp_motion_vector(cpi, &mv, mb_row, mb_col, LAST_FRAME, 0);
+ }
#endif // CONFIG_RAGE_CTRL
// Search in an older reference frame.
@@ -1183,7 +1189,10 @@
first_pass_motion_search(cpi, x, &zero_mv, &tmp_mv, &gf_motion_error);
#if CONFIG_RATE_CTRL
- store_fp_motion_vector(cpi, &tmp_mv, mb_row, mb_col, GOLDEN_FRAME, 1);
+ if (cpi->oxcf.use_simple_encode_api) {
+ store_fp_motion_vector(cpi, &tmp_mv, mb_row, mb_col, GOLDEN_FRAME,
+ 1);
+ }
#endif // CONFIG_RAGE_CTRL
if (gf_motion_error < motion_error && gf_motion_error < this_error)
@@ -1213,20 +1222,6 @@
best_ref_mv->row = 0;
best_ref_mv->col = 0;
-#if CONFIG_FP_MB_STATS
- if (cpi->use_fp_mb_stats) {
- // intra prediction statistics
- cpi->twopass.frame_mb_stats_buf[mb_index] = 0;
- cpi->twopass.frame_mb_stats_buf[mb_index] |= FPMB_DCINTRA_MASK;
- cpi->twopass.frame_mb_stats_buf[mb_index] |= FPMB_MOTION_ZERO_MASK;
- if (this_error > FPMB_ERROR_LARGE_TH) {
- cpi->twopass.frame_mb_stats_buf[mb_index] |= FPMB_ERROR_LARGE_MASK;
- } else if (this_error < FPMB_ERROR_SMALL_TH) {
- cpi->twopass.frame_mb_stats_buf[mb_index] |= FPMB_ERROR_SMALL_MASK;
- }
- }
-#endif
-
if (motion_error <= this_error) {
vpx_clear_system_state();
@@ -1271,47 +1266,9 @@
*best_ref_mv = mv;
-#if CONFIG_FP_MB_STATS
- if (cpi->use_fp_mb_stats) {
- // inter prediction statistics
- cpi->twopass.frame_mb_stats_buf[mb_index] = 0;
- cpi->twopass.frame_mb_stats_buf[mb_index] &= ~FPMB_DCINTRA_MASK;
- cpi->twopass.frame_mb_stats_buf[mb_index] |= FPMB_MOTION_ZERO_MASK;
- if (this_error > FPMB_ERROR_LARGE_TH) {
- cpi->twopass.frame_mb_stats_buf[mb_index] |= FPMB_ERROR_LARGE_MASK;
- } else if (this_error < FPMB_ERROR_SMALL_TH) {
- cpi->twopass.frame_mb_stats_buf[mb_index] |= FPMB_ERROR_SMALL_MASK;
- }
- }
-#endif
-
if (!is_zero_mv(&mv)) {
++(fp_acc_data->mvcount);
-#if CONFIG_FP_MB_STATS
- if (cpi->use_fp_mb_stats) {
- cpi->twopass.frame_mb_stats_buf[mb_index] &= ~FPMB_MOTION_ZERO_MASK;
- // check estimated motion direction
- if (mv.as_mv.col > 0 && mv.as_mv.col >= abs(mv.as_mv.row)) {
- // right direction
- cpi->twopass.frame_mb_stats_buf[mb_index] |=
- FPMB_MOTION_RIGHT_MASK;
- } else if (mv.as_mv.row < 0 &&
- abs(mv.as_mv.row) >= abs(mv.as_mv.col)) {
- // up direction
- cpi->twopass.frame_mb_stats_buf[mb_index] |= FPMB_MOTION_UP_MASK;
- } else if (mv.as_mv.col < 0 &&
- abs(mv.as_mv.col) >= abs(mv.as_mv.row)) {
- // left direction
- cpi->twopass.frame_mb_stats_buf[mb_index] |=
- FPMB_MOTION_LEFT_MASK;
- } else {
- // down direction
- cpi->twopass.frame_mb_stats_buf[mb_index] |=
- FPMB_MOTION_DOWN_MASK;
- }
- }
-#endif
// Does the row vector point inwards or outwards?
if (mb_row < cm->mb_rows / 2) {
if (mv.row > 0)
@@ -1359,7 +1316,9 @@
} else {
fp_acc_data->sr_coded_error += (int64_t)this_error;
#if CONFIG_RATE_CTRL
- store_fp_motion_vector(cpi, NULL, mb_row, mb_col, INTRA_FRAME, 0);
+ if (cpi->oxcf.use_simple_encode_api) {
+ store_fp_motion_vector(cpi, NULL, mb_row, mb_col, INTRA_FRAME, 0);
+ }
#endif // CONFIG_RAGE_CTRL
}
fp_acc_data->coded_error += (int64_t)this_error;
@@ -1388,9 +1347,11 @@
vp9_tile_init(tile, cm, 0, 0);
#if CONFIG_RATE_CTRL
- fp_motion_vector_info_reset(cpi->frame_info.frame_width,
- cpi->frame_info.frame_height,
- cpi->fp_motion_vector_info);
+ if (cpi->oxcf.use_simple_encode_api) {
+ fp_motion_vector_info_reset(cpi->frame_info.frame_width,
+ cpi->frame_info.frame_height,
+ cpi->fp_motion_vector_info);
+ }
#endif
for (mb_row = 0; mb_row < cm->mb_rows; ++mb_row) {
@@ -1424,12 +1385,6 @@
assert(new_yv12 != NULL);
assert(frame_is_intra_only(cm) || (lst_yv12 != NULL));
-#if CONFIG_FP_MB_STATS
- if (cpi->use_fp_mb_stats) {
- vp9_zero_array(cpi->twopass.frame_mb_stats_buf, cm->initial_mbs);
- }
-#endif
-
set_first_pass_params(cpi);
vp9_set_quantizer(cpi, find_fp_qindex(cm->bit_depth));
@@ -1490,12 +1445,6 @@
twopass->this_frame_stats = fps;
output_stats(&twopass->this_frame_stats);
accumulate_stats(&twopass->total_stats, &fps);
-
-#if CONFIG_FP_MB_STATS
- if (cpi->use_fp_mb_stats) {
- output_fpmb_stats(twopass->frame_mb_stats_buf, cm, cpi->output_pkt_list);
- }
-#endif
}
// Copy the previous Last Frame back into gf and and arf buffers if
@@ -1807,61 +1756,60 @@
twopass->arnr_strength_adjustment = 0;
}
-#define SR_DIFF_PART 0.0015
-#define INTRA_PART 0.005
-#define DEFAULT_DECAY_LIMIT 0.75
-#define LOW_SR_DIFF_TRHESH 0.1
-#define SR_DIFF_MAX 128.0
-#define LOW_CODED_ERR_PER_MB 10.0
-#define NCOUNT_FRAME_II_THRESH 6.0
-
-static double get_sr_decay_rate(const FRAME_INFO *frame_info,
+/* This function considers how the quality of prediction may be deteriorating
+ * with distance. It compares the coded error for the last frame and the
+ * second reference frame (usually two frames old) and also applies a factor
+ * based on the extent of INTRA coding.
+ *
+ * The decay factor is then used to reduce the contribution of frames further
+ * from the alt-ref or golden frame, to the bitrate boost calculation for that
+ * alt-ref or golden frame.
+ */
+static double get_sr_decay_rate(const TWO_PASS *const twopass,
const FIRSTPASS_STATS *frame) {
double sr_diff = (frame->sr_coded_error - frame->coded_error);
double sr_decay = 1.0;
- double modified_pct_inter;
- double modified_pcnt_intra;
- const double motion_amplitude_part =
- frame->pcnt_motion *
- ((frame->mvc_abs + frame->mvr_abs) /
- (frame_info->frame_height + frame_info->frame_width));
- modified_pct_inter = frame->pcnt_inter;
- if ((frame->coded_error > LOW_CODED_ERR_PER_MB) &&
- ((frame->intra_error / DOUBLE_DIVIDE_CHECK(frame->coded_error)) <
- (double)NCOUNT_FRAME_II_THRESH)) {
- modified_pct_inter =
- frame->pcnt_inter + frame->pcnt_intra_low - frame->pcnt_neutral;
- }
- modified_pcnt_intra = 100 * (1.0 - modified_pct_inter);
-
+ // Do nothing if the second ref to last frame error difference is
+ // very small or even negative.
if ((sr_diff > LOW_SR_DIFF_TRHESH)) {
- sr_diff = VPXMIN(sr_diff, SR_DIFF_MAX);
- sr_decay = 1.0 - (SR_DIFF_PART * sr_diff) - motion_amplitude_part -
- (INTRA_PART * modified_pcnt_intra);
+ const double sr_diff_part =
+ twopass->sr_diff_factor * ((sr_diff * 0.25) / frame->intra_error);
+ double modified_pct_inter = frame->pcnt_inter;
+ double modified_pcnt_intra;
+
+ if ((frame->coded_error > LOW_CODED_ERR_PER_MB) &&
+ ((frame->intra_error / DOUBLE_DIVIDE_CHECK(frame->coded_error)) <
+ (double)NCOUNT_FRAME_II_THRESH)) {
+ modified_pct_inter =
+ frame->pcnt_inter + frame->pcnt_intra_low - frame->pcnt_neutral;
+ }
+ modified_pcnt_intra = 100 * (1.0 - modified_pct_inter);
+
+ sr_decay = 1.0 - sr_diff_part - (INTRA_PART * modified_pcnt_intra);
}
- return VPXMAX(sr_decay, DEFAULT_DECAY_LIMIT);
+ return VPXMAX(sr_decay, twopass->sr_default_decay_limit);
}
// This function gives an estimate of how badly we believe the prediction
// quality is decaying from frame to frame.
-static double get_zero_motion_factor(const FRAME_INFO *frame_info,
+static double get_zero_motion_factor(const TWO_PASS *const twopass,
const FIRSTPASS_STATS *frame_stats) {
const double zero_motion_pct =
frame_stats->pcnt_inter - frame_stats->pcnt_motion;
- double sr_decay = get_sr_decay_rate(frame_info, frame_stats);
+ double sr_decay = get_sr_decay_rate(twopass, frame_stats);
return VPXMIN(sr_decay, zero_motion_pct);
}
-#define ZM_POWER_FACTOR 0.75
-
-static double get_prediction_decay_rate(const FRAME_INFO *frame_info,
+static double get_prediction_decay_rate(const TWO_PASS *const twopass,
const FIRSTPASS_STATS *frame_stats) {
- const double sr_decay_rate = get_sr_decay_rate(frame_info, frame_stats);
- const double zero_motion_factor =
- (0.95 * pow((frame_stats->pcnt_inter - frame_stats->pcnt_motion),
- ZM_POWER_FACTOR));
+ const double sr_decay_rate = get_sr_decay_rate(twopass, frame_stats);
+ double zero_motion_factor =
+ twopass->zm_factor * (frame_stats->pcnt_inter - frame_stats->pcnt_motion);
+ // Check that the zero motion factor is valid
+ assert(zero_motion_factor >= 0.0 && zero_motion_factor <= 1.0);
+
return VPXMAX(zero_motion_factor,
(sr_decay_rate + ((1.0 - sr_decay_rate) * zero_motion_factor)));
}
@@ -1942,10 +1890,9 @@
}
}
-#define BASELINE_ERR_PER_MB 12500.0
-#define GF_MAX_BOOST 96.0
static double calc_frame_boost(const FRAME_INFO *frame_info,
const FIRSTPASS_STATS *this_frame,
+ const TWO_PASS *const twopass,
int avg_frame_qindex,
double this_frame_mv_in_out) {
double frame_boost;
@@ -1954,8 +1901,8 @@
const double boost_q_correction = VPXMIN((0.5 + (lq * 0.015)), 1.5);
const double active_area = calculate_active_area(frame_info, this_frame);
- // Underlying boost factor is based on inter error ratio.
- frame_boost = (BASELINE_ERR_PER_MB * active_area) /
+ // Frame booost is based on inter error.
+ frame_boost = (twopass->err_per_mb * active_area) /
DOUBLE_DIVIDE_CHECK(this_frame->coded_error);
// Small adjustment for cases where there is a zoom out
@@ -1965,28 +1912,15 @@
// Q correction and scalling
frame_boost = frame_boost * boost_q_correction;
- return VPXMIN(frame_boost, GF_MAX_BOOST * boost_q_correction);
+ return VPXMIN(frame_boost, twopass->gf_frame_max_boost * boost_q_correction);
}
-static double kf_err_per_mb(VP9_COMP *cpi) {
- const VP9_COMMON *const cm = &cpi->common;
- unsigned int screen_area = (cm->width * cm->height);
-
- // Use a different error per mb factor for calculating boost for
- // different formats.
- if (screen_area < 1280 * 720) {
- return 2000.0;
- } else if (screen_area < 1920 * 1080) {
- return 500.0;
- }
- return 250.0;
-}
-
static double calc_kf_frame_boost(VP9_COMP *cpi,
const FIRSTPASS_STATS *this_frame,
double *sr_accumulator,
double this_frame_mv_in_out,
- double max_boost) {
+ double zm_factor) {
+ TWO_PASS *const twopass = &cpi->twopass;
double frame_boost;
const double lq = vp9_convert_qindex_to_q(
cpi->rc.avg_frame_qindex[INTER_FRAME], cpi->common.bit_depth);
@@ -1993,9 +1927,10 @@
const double boost_q_correction = VPXMIN((0.50 + (lq * 0.015)), 2.00);
const double active_area =
calculate_active_area(&cpi->frame_info, this_frame);
+ double max_boost;
- // Underlying boost factor is based on inter error ratio.
- frame_boost = (kf_err_per_mb(cpi) * active_area) /
+ // Frame booost is based on inter error.
+ frame_boost = (twopass->kf_err_per_mb * active_area) /
DOUBLE_DIVIDE_CHECK(this_frame->coded_error + *sr_accumulator);
// Update the accumulator for second ref error difference.
@@ -2012,15 +1947,23 @@
// The 40.0 value here is an experimentally derived baseline minimum.
// This value is in line with the minimum per frame boost in the alt_ref
// boost calculation.
- frame_boost = ((frame_boost + 40.0) * boost_q_correction);
+ frame_boost =
+ (frame_boost + twopass->kf_frame_min_boost) * boost_q_correction;
- return VPXMIN(frame_boost, max_boost * boost_q_correction);
+ // Maximum allowed boost this frame. May be different for first vs subsequent
+ // key frames.
+ max_boost = (cpi->common.current_video_frame == 0)
+ ? twopass->kf_frame_max_boost_first
+ : twopass->kf_frame_max_boost_subs;
+ max_boost *= zm_factor * boost_q_correction;
+
+ return VPXMIN(frame_boost, max_boost);
}
static int compute_arf_boost(const FRAME_INFO *frame_info,
- const FIRST_PASS_INFO *first_pass_info,
- int arf_show_idx, int f_frames, int b_frames,
- int avg_frame_qindex) {
+ TWO_PASS *const twopass, int arf_show_idx,
+ int f_frames, int b_frames, int avg_frame_qindex) {
+ const FIRST_PASS_INFO *first_pass_info = &twopass->first_pass_info;
int i;
double boost_score = 0.0;
double mv_ratio_accumulator = 0.0;
@@ -2051,14 +1994,14 @@
// Accumulate the effect of prediction quality decay.
if (!flash_detected) {
- decay_accumulator *= get_prediction_decay_rate(frame_info, this_frame);
+ decay_accumulator *= get_prediction_decay_rate(twopass, this_frame);
decay_accumulator = decay_accumulator < MIN_DECAY_FACTOR
? MIN_DECAY_FACTOR
: decay_accumulator;
}
- boost_score += decay_accumulator * calc_frame_boost(frame_info, this_frame,
- avg_frame_qindex,
- this_frame_mv_in_out);
+ boost_score += decay_accumulator *
+ calc_frame_boost(frame_info, this_frame, twopass,
+ avg_frame_qindex, this_frame_mv_in_out);
}
arf_boost = (int)boost_score;
@@ -2091,14 +2034,14 @@
// Cumulative effect of prediction quality decay.
if (!flash_detected) {
- decay_accumulator *= get_prediction_decay_rate(frame_info, this_frame);
+ decay_accumulator *= get_prediction_decay_rate(twopass, this_frame);
decay_accumulator = decay_accumulator < MIN_DECAY_FACTOR
? MIN_DECAY_FACTOR
: decay_accumulator;
}
- boost_score += decay_accumulator * calc_frame_boost(frame_info, this_frame,
- avg_frame_qindex,
- this_frame_mv_in_out);
+ boost_score += decay_accumulator *
+ calc_frame_boost(frame_info, this_frame, twopass,
+ avg_frame_qindex, this_frame_mv_in_out);
}
arf_boost += (int)boost_score;
@@ -2114,8 +2057,8 @@
TWO_PASS *const twopass = &cpi->twopass;
const int avg_inter_frame_qindex = cpi->rc.avg_frame_qindex[INTER_FRAME];
int arf_show_idx = get_show_idx(twopass);
- return compute_arf_boost(frame_info, &twopass->first_pass_info, arf_show_idx,
- f_frames, b_frames, avg_inter_frame_qindex);
+ return compute_arf_boost(frame_info, twopass, arf_show_idx, f_frames,
+ b_frames, avg_inter_frame_qindex);
}
// Calculate a section intra ratio used in setting max loop filter.
@@ -2530,6 +2473,9 @@
* (The following fields will remain unchanged after initialization of encoder.)
* rc->static_scene_max_gf_interval
* rc->min_gf_interval
+ * twopass->sr_diff_factor
+ * twopass->sr_default_decay_limit
+ * twopass->zm_factor
*
* Dynamic fields:
* (The following fields will be updated before or after coding each frame.)
@@ -2545,9 +2491,10 @@
*/
static int get_gop_coding_frame_num(
int *use_alt_ref, const FRAME_INFO *frame_info,
- const FIRST_PASS_INFO *first_pass_info, const RATE_CONTROL *rc,
+ const TWO_PASS *const twopass, const RATE_CONTROL *rc,
int gf_start_show_idx, const RANGE *active_gf_interval,
double gop_intra_factor, int lag_in_frames) {
+ const FIRST_PASS_INFO *first_pass_info = &twopass->first_pass_info;
double loop_decay_rate = 1.00;
double mv_ratio_accumulator = 0.0;
double this_frame_mv_in_out = 0.0;
@@ -2588,15 +2535,14 @@
// Monitor for static sections.
if ((rc->frames_since_key + gop_coding_frames - 1) > 1) {
- zero_motion_accumulator =
- VPXMIN(zero_motion_accumulator,
- get_zero_motion_factor(frame_info, next_frame));
+ zero_motion_accumulator = VPXMIN(
+ zero_motion_accumulator, get_zero_motion_factor(twopass, next_frame));
}
// Accumulate the effect of prediction quality decay.
if (!flash_detected) {
double last_loop_decay_rate = loop_decay_rate;
- loop_decay_rate = get_prediction_decay_rate(frame_info, next_frame);
+ loop_decay_rate = get_prediction_decay_rate(twopass, next_frame);
// Break clause to detect very still sections after motion. For example,
// a static image after a fade or other transition.
@@ -2656,25 +2602,25 @@
return gop_coding_frames;
}
-static RANGE get_active_gf_inverval_range(
- const FRAME_INFO *frame_info, const RATE_CONTROL *rc, int arf_active_or_kf,
- int gf_start_show_idx, int active_worst_quality, int last_boosted_qindex) {
+static RANGE get_active_gf_inverval_range_simple(int min_gf_interval,
+ int arf_active_or_kf,
+ int frames_to_key) {
RANGE active_gf_interval;
-#if CONFIG_RATE_CTRL
- (void)frame_info;
- (void)gf_start_show_idx;
- (void)active_worst_quality;
- (void)last_boosted_qindex;
- active_gf_interval.min = rc->min_gf_interval + arf_active_or_kf + 2;
-
+ active_gf_interval.min = min_gf_interval + arf_active_or_kf + 2;
active_gf_interval.max = 16 + arf_active_or_kf;
- if ((active_gf_interval.max <= rc->frames_to_key) &&
- (active_gf_interval.max >= (rc->frames_to_key - rc->min_gf_interval))) {
- active_gf_interval.min = rc->frames_to_key / 2;
- active_gf_interval.max = rc->frames_to_key / 2;
+ if ((active_gf_interval.max <= frames_to_key) &&
+ (active_gf_interval.max >= (frames_to_key - min_gf_interval))) {
+ active_gf_interval.min = frames_to_key / 2;
+ active_gf_interval.max = frames_to_key / 2;
}
-#else
+ return active_gf_interval;
+}
+
+static RANGE get_active_gf_inverval_range(
+ const FRAME_INFO *frame_info, const RATE_CONTROL *rc, int arf_active_or_kf,
+ int gf_start_show_idx, int active_worst_quality, int last_boosted_qindex) {
+ RANGE active_gf_interval;
int int_max_q = (int)(vp9_convert_qindex_to_q(active_worst_quality,
frame_info->bit_depth));
int q_term = (gf_start_show_idx == 0)
@@ -2712,7 +2658,6 @@
}
active_gf_interval.max =
VPXMAX(active_gf_interval.max, active_gf_interval.min);
-#endif
return active_gf_interval;
}
@@ -2773,9 +2718,14 @@
vpx_clear_system_state();
- active_gf_interval = get_active_gf_inverval_range(
- frame_info, rc, arf_active_or_kf, gf_start_show_idx,
- twopass->active_worst_quality, rc->last_boosted_qindex);
+ if (oxcf->use_simple_encode_api) {
+ active_gf_interval = get_active_gf_inverval_range_simple(
+ rc->min_gf_interval, arf_active_or_kf, rc->frames_to_key);
+ } else {
+ active_gf_interval = get_active_gf_inverval_range(
+ frame_info, rc, arf_active_or_kf, gf_start_show_idx,
+ twopass->active_worst_quality, rc->last_boosted_qindex);
+ }
if (cpi->multi_layer_arf) {
int arf_layers = get_arf_layers(cpi->multi_layer_arf, oxcf->enable_auto_arf,
@@ -2785,25 +2735,21 @@
gop_intra_factor = 1.0;
}
+ gop_coding_frames = get_gop_coding_frame_num(
+ &use_alt_ref, frame_info, twopass, rc, gf_start_show_idx,
+ &active_gf_interval, gop_intra_factor, cpi->oxcf.lag_in_frames);
+ use_alt_ref &= allow_alt_ref;
#if CONFIG_RATE_CTRL
- {
+ // If the external gop_command is on, we will override the decisions
+ // of gop_coding_frames and use_alt_ref.
+ if (cpi->oxcf.use_simple_encode_api) {
const GOP_COMMAND *gop_command = &cpi->encode_command.gop_command;
assert(allow_alt_ref == 1);
if (gop_command->use) {
gop_coding_frames = gop_command_coding_frame_count(gop_command);
use_alt_ref = gop_command->use_alt_ref;
- } else {
- gop_coding_frames = get_gop_coding_frame_num(
- &use_alt_ref, frame_info, first_pass_info, rc, gf_start_show_idx,
- &active_gf_interval, gop_intra_factor, cpi->oxcf.lag_in_frames);
- use_alt_ref &= allow_alt_ref;
}
}
-#else
- gop_coding_frames = get_gop_coding_frame_num(
- &use_alt_ref, frame_info, first_pass_info, rc, gf_start_show_idx,
- &active_gf_interval, gop_intra_factor, cpi->oxcf.lag_in_frames);
- use_alt_ref &= allow_alt_ref;
#endif
// Was the group length constrained by the requirement for a new KF?
@@ -2823,8 +2769,8 @@
// Calculate the boost for alt ref.
rc->gfu_boost =
- compute_arf_boost(frame_info, first_pass_info, arf_show_idx, f_frames,
- b_frames, avg_inter_frame_qindex);
+ compute_arf_boost(frame_info, twopass, arf_show_idx, f_frames, b_frames,
+ avg_inter_frame_qindex);
rc->source_alt_ref_pending = 1;
} else {
const int f_frames = gop_coding_frames - 1;
@@ -2834,9 +2780,9 @@
const int gld_show_idx =
VPXMIN(gf_start_show_idx + 1, fps_get_num_frames(first_pass_info));
const int arf_boost =
- compute_arf_boost(frame_info, first_pass_info, gld_show_idx, f_frames,
- b_frames, avg_inter_frame_qindex);
- rc->gfu_boost = VPXMIN(MAX_GF_BOOST, arf_boost);
+ compute_arf_boost(frame_info, twopass, gld_show_idx, f_frames, b_frames,
+ avg_inter_frame_qindex);
+ rc->gfu_boost = VPXMIN((int)twopass->gf_max_total_boost, arf_boost);
rc->source_alt_ref_pending = 0;
}
@@ -2939,7 +2885,9 @@
cpi, group_av_err, (group_av_skip_pct + group_av_inactive_zone),
group_av_noise, vbr_group_bits_per_frame);
twopass->active_worst_quality =
- (tmp_q + (twopass->active_worst_quality * 3)) >> 2;
+ (int)((tmp_q + (twopass->active_worst_quality *
+ (twopass->active_wq_factor - 1))) /
+ twopass->active_wq_factor);
#if CONFIG_ALWAYS_ADJUST_BPM
// Reset rolling actual and target bits counters for ARF groups.
@@ -3159,18 +3107,10 @@
#define MIN_SCAN_FRAMES_FOR_KF_BOOST 32
#define KF_ABS_ZOOM_THRESH 6.0
-#ifdef AGGRESSIVE_VBR
-#define KF_MAX_FRAME_BOOST 80.0
-#define MAX_KF_TOT_BOOST 4800
-#else
-#define KF_MAX_FRAME_BOOST 96.0
-#define MAX_KF_TOT_BOOST 5400
-#endif
-
int vp9_get_frames_to_next_key(const VP9EncoderConfig *oxcf,
- const FRAME_INFO *frame_info,
- const FIRST_PASS_INFO *first_pass_info,
- int kf_show_idx, int min_gf_interval) {
+ const TWO_PASS *const twopass, int kf_show_idx,
+ int min_gf_interval) {
+ const FIRST_PASS_INFO *first_pass_info = &twopass->first_pass_info;
double recent_loop_decay[FRAMES_TO_CHECK_DECAY];
int j;
int frames_to_key;
@@ -3197,7 +3137,7 @@
break;
// How fast is the prediction quality decaying?
- loop_decay_rate = get_prediction_decay_rate(frame_info, next_frame);
+ loop_decay_rate = get_prediction_decay_rate(twopass, next_frame);
// We want to know something about the recent past... rather than
// as used elsewhere where we are concerned with decay in prediction
@@ -3283,8 +3223,8 @@
kf_mod_err = calc_norm_frame_score(oxcf, frame_info, keyframe_stats,
mean_mod_score, av_err);
- rc->frames_to_key = vp9_get_frames_to_next_key(
- oxcf, frame_info, first_pass_info, kf_show_idx, rc->min_gf_interval);
+ rc->frames_to_key = vp9_get_frames_to_next_key(oxcf, twopass, kf_show_idx,
+ rc->min_gf_interval);
// If there is a max kf interval set by the user we must obey it.
// We already breakout of the loop above at 2x max.
@@ -3366,7 +3306,7 @@
if (i > 0) {
zero_motion_accumulator =
VPXMIN(zero_motion_accumulator,
- get_zero_motion_factor(&cpi->frame_info, &next_frame));
+ get_zero_motion_factor(twopass, &next_frame));
} else {
zero_motion_accumulator =
next_frame.pcnt_inter - next_frame.pcnt_motion;
@@ -3380,8 +3320,8 @@
// the first key frame or it points to a refernce before the new key
// frame.
if (i < 2) sr_accumulator = 0.0;
- frame_boost = calc_kf_frame_boost(cpi, &next_frame, &sr_accumulator, 0,
- KF_MAX_FRAME_BOOST * zm_factor);
+ frame_boost =
+ calc_kf_frame_boost(cpi, &next_frame, &sr_accumulator, 0, zm_factor);
boost_score += frame_boost;
@@ -3410,12 +3350,12 @@
// Special case for static / slide show content but dont apply
// if the kf group is very short.
if ((zero_motion_accumulator > 0.99) && (rc->frames_to_key > 8)) {
- rc->kf_boost = MAX_KF_TOT_BOOST;
+ rc->kf_boost = (int)(twopass->kf_max_total_boost);
} else {
- // Apply various clamps for min and max boost
+ // Apply various clamps for min and max oost
rc->kf_boost = VPXMAX((int)boost_score, (rc->frames_to_key * 3));
rc->kf_boost = VPXMAX(rc->kf_boost, MIN_KF_TOT_BOOST);
- rc->kf_boost = VPXMIN(rc->kf_boost, MAX_KF_TOT_BOOST);
+ rc->kf_boost = VPXMIN(rc->kf_boost, (int)(twopass->kf_max_total_boost));
}
// Work out how many bits to allocate for the key frame itself.
@@ -3451,23 +3391,66 @@
}
}
-static int is_skippable_frame(const VP9_COMP *cpi) {
- // If the current frame does not have non-zero motion vector detected in the
- // first pass, and so do its previous and forward frames, then this frame
- // can be skipped for partition check, and the partition size is assigned
- // according to the variance
- const TWO_PASS *const twopass = &cpi->twopass;
+// Configure image size specific vizier parameters.
+// Later these will be set via additional command line options
+void vp9_init_vizier_params(TWO_PASS *const twopass, int screen_area) {
+ // When |use_vizier_rc_params| is 1, we expect the rc parameters below to
+ // have been initialised on the command line as adjustment factors such
+ // that a factor of 1.0 will match the default behavior when
+ // |use_vizier_rc_params| is 0
+ if (twopass->use_vizier_rc_params) {
+ twopass->active_wq_factor *= AV_WQ_FACTOR;
+ twopass->err_per_mb *= BASELINE_ERR_PER_MB;
+ twopass->sr_default_decay_limit *= DEFAULT_DECAY_LIMIT;
+ if (twopass->sr_default_decay_limit > 1.0) // > 1.0 here makes no sense
+ twopass->sr_default_decay_limit = 1.0;
+ twopass->sr_diff_factor *= 1.0;
+ twopass->gf_frame_max_boost *= GF_MAX_FRAME_BOOST;
+ twopass->gf_max_total_boost *= MAX_GF_BOOST;
+ // NOTE: In use max boost has precedence over min boost. So even if min is
+ // somehow set higher than max the final boost value will be clamped to the
+ // appropriate maximum.
+ twopass->kf_frame_min_boost *= KF_MIN_FRAME_BOOST;
+ twopass->kf_frame_max_boost_first *= KF_MAX_FRAME_BOOST;
+ twopass->kf_frame_max_boost_subs *= KF_MAX_FRAME_BOOST;
+ twopass->kf_max_total_boost *= MAX_KF_TOT_BOOST;
+ twopass->zm_factor *= DEFAULT_ZM_FACTOR;
+ if (twopass->zm_factor > 1.0) // > 1.0 here makes no sense
+ twopass->zm_factor = 1.0;
- return (!frame_is_intra_only(&cpi->common) &&
- twopass->stats_in - 2 > twopass->stats_in_start &&
- twopass->stats_in < twopass->stats_in_end &&
- (twopass->stats_in - 1)->pcnt_inter -
- (twopass->stats_in - 1)->pcnt_motion ==
- 1 &&
- (twopass->stats_in - 2)->pcnt_inter -
- (twopass->stats_in - 2)->pcnt_motion ==
- 1 &&
- twopass->stats_in->pcnt_inter - twopass->stats_in->pcnt_motion == 1);
+ // Correction for the fact that the kf_err_per_mb_factor default is
+ // already different for different video formats and ensures that a passed
+ // in value of 1.0 on the vizier command line will still match the current
+ // default.
+ if (screen_area < 1280 * 720) {
+ twopass->kf_err_per_mb *= 2000.0;
+ } else if (screen_area < 1920 * 1080) {
+ twopass->kf_err_per_mb *= 500.0;
+ } else {
+ twopass->kf_err_per_mb *= 250.0;
+ }
+ } else {
+ // When |use_vizier_rc_params| is 0, use defaults.
+ twopass->active_wq_factor = AV_WQ_FACTOR;
+ twopass->err_per_mb = BASELINE_ERR_PER_MB;
+ twopass->sr_default_decay_limit = DEFAULT_DECAY_LIMIT;
+ twopass->sr_diff_factor = 1.0;
+ twopass->gf_frame_max_boost = GF_MAX_FRAME_BOOST;
+ twopass->gf_max_total_boost = MAX_GF_BOOST;
+ twopass->kf_frame_min_boost = KF_MIN_FRAME_BOOST;
+ twopass->kf_frame_max_boost_first = KF_MAX_FRAME_BOOST;
+ twopass->kf_frame_max_boost_subs = KF_MAX_FRAME_BOOST;
+ twopass->kf_max_total_boost = MAX_KF_TOT_BOOST;
+ twopass->zm_factor = DEFAULT_ZM_FACTOR;
+
+ if (screen_area < 1280 * 720) {
+ twopass->kf_err_per_mb = 2000.0;
+ } else if (screen_area < 1920 * 1080) {
+ twopass->kf_err_per_mb = 500.0;
+ } else {
+ twopass->kf_err_per_mb = 250.0;
+ }
+ }
}
void vp9_rc_get_second_pass_params(VP9_COMP *cpi) {
@@ -3480,6 +3463,13 @@
if (!twopass->stats_in) return;
+ // Configure image size specific vizier parameters
+ if (cm->current_video_frame == 0) {
+ unsigned int screen_area = (cm->width * cm->height);
+
+ vp9_init_vizier_params(twopass, screen_area);
+ }
+
// If this is an arf frame then we dont want to read the stats file or
// advance the input pointer as we already have what we need.
if (gf_group->update_type[gf_group->index] == ARF_UPDATE) {
@@ -3498,13 +3488,6 @@
cm->frame_type = INTER_FRAME;
- // Do the firstpass stats indicate that this frame is skippable for the
- // partition search?
- if (cpi->sf.allow_partition_search_skip && cpi->oxcf.pass == 2 &&
- !cpi->use_svc) {
- cpi->partition_search_skippable_frame = is_skippable_frame(cpi);
- }
-
// The multiplication by 256 reverses a scaling factor of (>> 8)
// applied when combining MB error values for the frame.
twopass->mb_av_energy = log((this_frame.intra_error * 256.0) + 1.0);
@@ -3587,13 +3570,6 @@
vp9_configure_buffer_updates(cpi, gf_group->index);
- // Do the firstpass stats indicate that this frame is skippable for the
- // partition search?
- if (cpi->sf.allow_partition_search_skip && cpi->oxcf.pass == 2 &&
- !cpi->use_svc) {
- cpi->partition_search_skippable_frame = is_skippable_frame(cpi);
- }
-
rc->base_frame_target = gf_group->bit_allocation[gf_group->index];
// The multiplication by 256 reverses a scaling factor of (>> 8)
@@ -3605,9 +3581,6 @@
subtract_stats(&twopass->total_left_stats, &this_frame);
}
-#define MINQ_ADJ_LIMIT 48
-#define MINQ_ADJ_LIMIT_CQ 20
-#define HIGH_UNDERSHOOT_RATIO 2
void vp9_twopass_postencode_update(VP9_COMP *cpi) {
TWO_PASS *const twopass = &cpi->twopass;
RATE_CONTROL *const rc = &cpi->rc;
@@ -3747,8 +3720,7 @@
*first_is_key_frame = 0;
if (rc.frames_to_key == 0) {
rc.frames_to_key = vp9_get_frames_to_next_key(
- &cpi->oxcf, &cpi->frame_info, &cpi->twopass.first_pass_info,
- *first_show_idx, rc.min_gf_interval);
+ &cpi->oxcf, &cpi->twopass, *first_show_idx, rc.min_gf_interval);
rc.frames_since_key = 0;
*first_is_key_frame = 1;
}
@@ -3756,18 +3728,18 @@
if (gop_command->use) {
*coding_frame_count = gop_command_coding_frame_count(gop_command);
*use_alt_ref = gop_command->use_alt_ref;
- assert(*coding_frame_count < rc.frames_to_key);
+ assert(gop_command->show_frame_count <= rc.frames_to_key);
} else {
*coding_frame_count = vp9_get_gop_coding_frame_count(
- &cpi->oxcf, &cpi->frame_info, &cpi->twopass.first_pass_info, &rc,
- *first_show_idx, multi_layer_arf, allow_alt_ref, *first_is_key_frame,
+ &cpi->oxcf, &cpi->twopass, &cpi->frame_info, &rc, *first_show_idx,
+ multi_layer_arf, allow_alt_ref, *first_is_key_frame,
*last_gop_use_alt_ref, use_alt_ref);
}
}
int vp9_get_gop_coding_frame_count(const VP9EncoderConfig *oxcf,
+ const TWO_PASS *const twopass,
const FRAME_INFO *frame_info,
- const FIRST_PASS_INFO *first_pass_info,
const RATE_CONTROL *rc, int show_idx,
int multi_layer_arf, int allow_alt_ref,
int first_is_key_frame,
@@ -3775,12 +3747,19 @@
int frame_count;
double gop_intra_factor;
const int arf_active_or_kf = last_gop_use_alt_ref || first_is_key_frame;
- RANGE active_gf_interval = get_active_gf_inverval_range(
- frame_info, rc, arf_active_or_kf, show_idx, /*active_worst_quality=*/0,
- /*last_boosted_qindex=*/0);
+ RANGE active_gf_interval;
+ int arf_layers;
+ if (oxcf->use_simple_encode_api) {
+ active_gf_interval = get_active_gf_inverval_range_simple(
+ rc->min_gf_interval, arf_active_or_kf, rc->frames_to_key);
+ } else {
+ active_gf_interval = get_active_gf_inverval_range(
+ frame_info, rc, arf_active_or_kf, show_idx, /*active_worst_quality=*/0,
+ /*last_boosted_qindex=*/0);
+ }
- const int arf_layers = get_arf_layers(multi_layer_arf, oxcf->enable_auto_arf,
- active_gf_interval.max);
+ arf_layers = get_arf_layers(multi_layer_arf, oxcf->enable_auto_arf,
+ active_gf_interval.max);
if (multi_layer_arf) {
gop_intra_factor = 1.0 + 0.25 * arf_layers;
} else {
@@ -3787,9 +3766,9 @@
gop_intra_factor = 1.0;
}
- frame_count = get_gop_coding_frame_num(
- use_alt_ref, frame_info, first_pass_info, rc, show_idx,
- &active_gf_interval, gop_intra_factor, oxcf->lag_in_frames);
+ frame_count = get_gop_coding_frame_num(use_alt_ref, frame_info, twopass, rc,
+ show_idx, &active_gf_interval,
+ gop_intra_factor, oxcf->lag_in_frames);
*use_alt_ref &= allow_alt_ref;
return frame_count;
}
@@ -3797,9 +3776,10 @@
// Under CONFIG_RATE_CTRL, once the first_pass_info is ready, the number of
// coding frames (including show frame and alt ref) can be determined.
int vp9_get_coding_frame_num(const VP9EncoderConfig *oxcf,
- const FRAME_INFO *frame_info,
- const FIRST_PASS_INFO *first_pass_info,
- int multi_layer_arf, int allow_alt_ref) {
+ const TWO_PASS *const twopass,
+ const FRAME_INFO *frame_info, int multi_layer_arf,
+ int allow_alt_ref) {
+ const FIRST_PASS_INFO *first_pass_info = &twopass->first_pass_info;
int coding_frame_num = 0;
RATE_CONTROL rc;
int gop_coding_frame_count;
@@ -3812,14 +3792,14 @@
int use_alt_ref;
int first_is_key_frame = 0;
if (rc.frames_to_key == 0) {
- rc.frames_to_key = vp9_get_frames_to_next_key(
- oxcf, frame_info, first_pass_info, show_idx, rc.min_gf_interval);
+ rc.frames_to_key = vp9_get_frames_to_next_key(oxcf, twopass, show_idx,
+ rc.min_gf_interval);
rc.frames_since_key = 0;
first_is_key_frame = 1;
}
gop_coding_frame_count = vp9_get_gop_coding_frame_count(
- oxcf, frame_info, first_pass_info, &rc, show_idx, multi_layer_arf,
+ oxcf, twopass, frame_info, &rc, show_idx, multi_layer_arf,
allow_alt_ref, first_is_key_frame, last_gop_use_alt_ref, &use_alt_ref);
rc.source_alt_ref_active = use_alt_ref;
@@ -3834,9 +3814,8 @@
}
void vp9_get_key_frame_map(const VP9EncoderConfig *oxcf,
- const FRAME_INFO *frame_info,
- const FIRST_PASS_INFO *first_pass_info,
- int *key_frame_map) {
+ const TWO_PASS *const twopass, int *key_frame_map) {
+ const FIRST_PASS_INFO *first_pass_info = &twopass->first_pass_info;
int show_idx = 0;
RATE_CONTROL rc;
vp9_rc_init(oxcf, 1, &rc);
@@ -3849,8 +3828,8 @@
while (show_idx < first_pass_info->num_frames) {
int key_frame_group_size;
key_frame_map[show_idx] = 1;
- key_frame_group_size = vp9_get_frames_to_next_key(
- oxcf, frame_info, first_pass_info, show_idx, rc.min_gf_interval);
+ key_frame_group_size =
+ vp9_get_frames_to_next_key(oxcf, twopass, show_idx, rc.min_gf_interval);
assert(key_frame_group_size > 0);
show_idx += key_frame_group_size;
}
--- a/vp9/encoder/vp9_firstpass.h
+++ b/vp9/encoder/vp9_firstpass.h
@@ -21,27 +21,6 @@
extern "C" {
#endif
-#if CONFIG_FP_MB_STATS
-
-#define FPMB_DCINTRA_MASK 0x01
-
-#define FPMB_MOTION_ZERO_MASK 0x02
-#define FPMB_MOTION_LEFT_MASK 0x04
-#define FPMB_MOTION_RIGHT_MASK 0x08
-#define FPMB_MOTION_UP_MASK 0x10
-#define FPMB_MOTION_DOWN_MASK 0x20
-
-#define FPMB_ERROR_SMALL_MASK 0x40
-#define FPMB_ERROR_LARGE_MASK 0x80
-#define FPMB_ERROR_SMALL_TH 2000
-#define FPMB_ERROR_LARGE_TH 48000
-
-typedef struct {
- uint8_t *mb_stats_start;
- uint8_t *mb_stats_end;
-} FIRSTPASS_MB_STATS;
-#endif
-
#define INVALID_ROW (-1)
#define MAX_ARF_LAYERS 6
@@ -188,12 +167,6 @@
double mb_av_energy;
double mb_smooth_pct;
-#if CONFIG_FP_MB_STATS
- uint8_t *frame_mb_stats_buf;
- uint8_t *this_frame_mb_stats;
- FIRSTPASS_MB_STATS firstpass_mb_stats;
-#endif
-
FP_MB_FLOAT_STATS *fp_mb_float_stats;
// An indication of the content type of the current frame
@@ -221,6 +194,24 @@
int last_qindex_of_arf_layer[MAX_ARF_LAYERS];
GF_GROUP gf_group;
+
+ // Vizeir project experimental two pass rate control parameters.
+ // When |use_vizier_rc_params| is 1, the following parameters will
+ // be overwritten by pass in values. Otherwise, they are initialized
+ // by default values.
+ int use_vizier_rc_params;
+ double active_wq_factor;
+ double err_per_mb;
+ double sr_default_decay_limit;
+ double sr_diff_factor;
+ double kf_err_per_mb;
+ double kf_frame_min_boost;
+ double kf_frame_max_boost_first; // Max for first kf in a chunk.
+ double kf_frame_max_boost_subs; // Max for subsequent mid chunk kfs.
+ double kf_max_total_boost;
+ double gf_max_total_boost;
+ double gf_frame_max_boost;
+ double zm_factor;
} TWO_PASS;
struct VP9_COMP;
@@ -239,6 +230,7 @@
void vp9_init_second_pass(struct VP9_COMP *cpi);
void vp9_rc_get_second_pass_params(struct VP9_COMP *cpi);
+void vp9_init_vizier_params(TWO_PASS *const twopass, int screen_area);
// Post encode update of the rate control parameters for 2-pass
void vp9_twopass_postencode_update(struct VP9_COMP *cpi);
@@ -248,9 +240,8 @@
struct VP9EncoderConfig;
int vp9_get_frames_to_next_key(const struct VP9EncoderConfig *oxcf,
- const FRAME_INFO *frame_info,
- const FIRST_PASS_INFO *first_pass_info,
- int kf_show_idx, int min_gf_interval);
+ const TWO_PASS *const twopass, int kf_show_idx,
+ int min_gf_interval);
#if CONFIG_RATE_CTRL
/* Call this function to get info about the next group of pictures.
* This function should be called after vp9_create_compressor() when encoding
@@ -265,8 +256,8 @@
/*!\brief Call this function before coding a new group of pictures to get
* information about it.
* \param[in] oxcf Encoder config
+ * \param[in] twopass Twopass info
* \param[in] frame_info Frame info
- * \param[in] first_pass_info First pass stats
* \param[in] rc Rate control state
* \param[in] show_idx Show index of the first frame in the group
* \param[in] multi_layer_arf Is multi-layer alternate reference used
@@ -279,8 +270,8 @@
* \return Returns coding frame count
*/
int vp9_get_gop_coding_frame_count(const struct VP9EncoderConfig *oxcf,
+ const TWO_PASS *const twopass,
const FRAME_INFO *frame_info,
- const FIRST_PASS_INFO *first_pass_info,
const RATE_CONTROL *rc, int show_idx,
int multi_layer_arf, int allow_alt_ref,
int first_is_key_frame,
@@ -287,19 +278,17 @@
int last_gop_use_alt_ref, int *use_alt_ref);
int vp9_get_coding_frame_num(const struct VP9EncoderConfig *oxcf,
- const FRAME_INFO *frame_info,
- const FIRST_PASS_INFO *first_pass_info,
- int multi_layer_arf, int allow_alt_ref);
+ const TWO_PASS *const twopass,
+ const FRAME_INFO *frame_info, int multi_layer_arf,
+ int allow_alt_ref);
/*!\brief Compute a key frame binary map indicates whether key frames appear
* in the corresponding positions. The passed in key_frame_map must point to an
- * integer array with length equal to first_pass_info->num_frames, which is the
- * number of show frames in the video.
+ * integer array with length equal to twopass->first_pass_info.num_frames,
+ * which is the number of show frames in the video.
*/
void vp9_get_key_frame_map(const struct VP9EncoderConfig *oxcf,
- const FRAME_INFO *frame_info,
- const FIRST_PASS_INFO *first_pass_info,
- int *key_frame_map);
+ const TWO_PASS *const twopass, int *key_frame_map);
#endif // CONFIG_RATE_CTRL
FIRSTPASS_STATS vp9_get_frame_stats(const TWO_PASS *twopass);
--- a/vp9/encoder/vp9_lookahead.h
+++ b/vp9/encoder/vp9_lookahead.h
@@ -82,15 +82,11 @@
* This function will copy the source image into a new framebuffer with
* the expected stride/border.
*
- * If active_map is non-NULL and there is only one frame in the queue, then copy
- * only active macroblocks.
- *
* \param[in] ctx Pointer to the lookahead context
* \param[in] src Pointer to the image to enqueue
* \param[in] ts_start Timestamp for the start of this frame
* \param[in] ts_end Timestamp for the end of this frame
* \param[in] flags Flags set on this frame
- * \param[in] active_map Map that specifies which macroblock is active
*/
int vp9_lookahead_push(struct lookahead_ctx *ctx, YV12_BUFFER_CONFIG *src,
int64_t ts_start, int64_t ts_end, int use_highbitdepth,
--- a/vp9/encoder/vp9_ratectrl.c
+++ b/vp9/encoder/vp9_ratectrl.c
@@ -39,9 +39,6 @@
#define MAX_MB_RATE 250
#define MAXRATE_1080P 4000000
-#define DEFAULT_KF_BOOST 2000
-#define DEFAULT_GF_BOOST 2000
-
#define LIMIT_QRANGE_FOR_ALTREF_AND_KEY 1
#define MIN_BPB_FACTOR 0.005
@@ -280,9 +277,9 @@
svc->current_superframe > 0) {
// TODO(marpan): This may need to be modified for temporal layers.
const double framerate_pts = 10000000.0 / ts_delta;
- lrc->bits_off_target += (int)(lc->target_bandwidth / framerate_pts);
+ lrc->bits_off_target += (int)round(lc->target_bandwidth / framerate_pts);
} else {
- lrc->bits_off_target += (int)(lc->target_bandwidth / lc->framerate);
+ lrc->bits_off_target += (int)round(lc->target_bandwidth / lc->framerate);
}
// Clip buffer level to maximum buffer size for the layer.
lrc->bits_off_target =
@@ -410,6 +407,7 @@
rc->source_alt_ref_active = 0;
rc->frames_till_gf_update_due = 0;
+ rc->constrain_gf_key_freq_onepass_vbr = 1;
rc->ni_av_qi = oxcf->worst_allowed_q;
rc->ni_tot_qi = 0;
rc->ni_frames = 0;
@@ -1720,10 +1718,12 @@
}
#if CONFIG_RATE_CTRL
- if (cpi->encode_command.use_external_target_frame_bits) {
- rc->this_frame_target = cpi->encode_command.target_frame_bits;
+ if (cpi->oxcf.use_simple_encode_api) {
+ if (cpi->encode_command.use_external_target_frame_bits) {
+ rc->this_frame_target = cpi->encode_command.target_frame_bits;
+ }
}
-#endif
+#endif // CONFIG_RATE_CTRL
// Target rate per SB64 (including partial SB64s.
rc->sb64_target_rate = (int)(((int64_t)rc->this_frame_target * 64 * 64) /
@@ -2009,7 +2009,7 @@
}
}
-static int calc_pframe_target_size_one_pass_vbr(const VP9_COMP *const cpi) {
+int vp9_calc_pframe_target_size_one_pass_vbr(const VP9_COMP *cpi) {
const RATE_CONTROL *const rc = &cpi->rc;
const int af_ratio = rc->af_ratio_onepass_vbr;
int64_t target =
@@ -2024,7 +2024,7 @@
return vp9_rc_clamp_pframe_target_size(cpi, (int)target);
}
-static int calc_iframe_target_size_one_pass_vbr(const VP9_COMP *const cpi) {
+int vp9_calc_iframe_target_size_one_pass_vbr(const VP9_COMP *cpi) {
static const int kf_ratio = 25;
const RATE_CONTROL *rc = &cpi->rc;
const int target = rc->avg_frame_bandwidth * kf_ratio;
@@ -2050,22 +2050,9 @@
}
}
-void vp9_rc_get_one_pass_vbr_params(VP9_COMP *cpi) {
- VP9_COMMON *const cm = &cpi->common;
+void vp9_set_gf_update_one_pass_vbr(VP9_COMP *const cpi) {
RATE_CONTROL *const rc = &cpi->rc;
- int target;
- if (!cpi->refresh_alt_ref_frame &&
- (cm->current_video_frame == 0 || (cpi->frame_flags & FRAMEFLAGS_KEY) ||
- rc->frames_to_key == 0)) {
- cm->frame_type = KEY_FRAME;
- rc->this_key_frame_forced =
- cm->current_video_frame != 0 && rc->frames_to_key == 0;
- rc->frames_to_key = cpi->oxcf.key_freq;
- rc->kf_boost = DEFAULT_KF_BOOST;
- rc->source_alt_ref_active = 0;
- } else {
- cm->frame_type = INTER_FRAME;
- }
+ VP9_COMMON *const cm = &cpi->common;
if (rc->frames_till_gf_update_due == 0) {
double rate_err = 1.0;
rc->gfu_boost = DEFAULT_GF_BOOST;
@@ -2084,18 +2071,23 @@
rate_err > 3.5) {
rc->baseline_gf_interval =
VPXMIN(15, (3 * rc->baseline_gf_interval) >> 1);
- } else if (rc->avg_frame_low_motion < 20) {
+ } else if (rc->avg_frame_low_motion > 0 &&
+ rc->avg_frame_low_motion < 20) {
// Decrease gf interval for high motion case.
rc->baseline_gf_interval = VPXMAX(6, rc->baseline_gf_interval >> 1);
}
- // Adjust boost and af_ratio based on avg_frame_low_motion, which varies
- // between 0 and 100 (stationary, 100% zero/small motion).
- rc->gfu_boost =
- VPXMAX(500, DEFAULT_GF_BOOST * (rc->avg_frame_low_motion << 1) /
- (rc->avg_frame_low_motion + 100));
+ // Adjust boost and af_ratio based on avg_frame_low_motion, which
+ // varies between 0 and 100 (stationary, 100% zero/small motion).
+ if (rc->avg_frame_low_motion > 0)
+ rc->gfu_boost =
+ VPXMAX(500, DEFAULT_GF_BOOST * (rc->avg_frame_low_motion << 1) /
+ (rc->avg_frame_low_motion + 100));
+ else if (rc->avg_frame_low_motion == 0 && rate_err > 1.0)
+ rc->gfu_boost = DEFAULT_GF_BOOST >> 1;
rc->af_ratio_onepass_vbr = VPXMIN(15, VPXMAX(5, 3 * rc->gfu_boost / 400));
}
- adjust_gfint_frame_constraint(cpi, rc->frames_to_key);
+ if (rc->constrain_gf_key_freq_onepass_vbr)
+ adjust_gfint_frame_constraint(cpi, rc->frames_to_key);
rc->frames_till_gf_update_due = rc->baseline_gf_interval;
cpi->refresh_golden_frame = 1;
rc->source_alt_ref_pending = 0;
@@ -2105,10 +2097,29 @@
rc->alt_ref_gf_group = 1;
}
}
+}
+
+void vp9_rc_get_one_pass_vbr_params(VP9_COMP *cpi) {
+ VP9_COMMON *const cm = &cpi->common;
+ RATE_CONTROL *const rc = &cpi->rc;
+ int target;
+ if (!cpi->refresh_alt_ref_frame &&
+ (cm->current_video_frame == 0 || (cpi->frame_flags & FRAMEFLAGS_KEY) ||
+ rc->frames_to_key == 0)) {
+ cm->frame_type = KEY_FRAME;
+ rc->this_key_frame_forced =
+ cm->current_video_frame != 0 && rc->frames_to_key == 0;
+ rc->frames_to_key = cpi->oxcf.key_freq;
+ rc->kf_boost = DEFAULT_KF_BOOST;
+ rc->source_alt_ref_active = 0;
+ } else {
+ cm->frame_type = INTER_FRAME;
+ }
+ vp9_set_gf_update_one_pass_vbr(cpi);
if (cm->frame_type == KEY_FRAME)
- target = calc_iframe_target_size_one_pass_vbr(cpi);
+ target = vp9_calc_iframe_target_size_one_pass_vbr(cpi);
else
- target = calc_pframe_target_size_one_pass_vbr(cpi);
+ target = vp9_calc_pframe_target_size_one_pass_vbr(cpi);
vp9_rc_set_frame_target(cpi, target);
if (cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ && cpi->oxcf.pass == 0)
vp9_cyclic_refresh_update_parameters(cpi);
@@ -2526,26 +2537,25 @@
rc->min_gf_interval = FIXED_GF_INTERVAL;
rc->static_scene_max_gf_interval = FIXED_GF_INTERVAL;
} else {
+ double framerate = cpi->framerate;
// Set Maximum gf/arf interval
rc->max_gf_interval = oxcf->max_gf_interval;
rc->min_gf_interval = oxcf->min_gf_interval;
#if CONFIG_RATE_CTRL
+ if (oxcf->use_simple_encode_api) {
+ // In this experiment, we avoid framerate being changed dynamically during
+ // encoding.
+ framerate = oxcf->init_framerate;
+ }
+#endif // CONFIG_RATE_CTRL
if (rc->min_gf_interval == 0) {
rc->min_gf_interval = vp9_rc_get_default_min_gf_interval(
- oxcf->width, oxcf->height, oxcf->init_framerate);
+ oxcf->width, oxcf->height, framerate);
}
if (rc->max_gf_interval == 0) {
- rc->max_gf_interval = vp9_rc_get_default_max_gf_interval(
- oxcf->init_framerate, rc->min_gf_interval);
+ rc->max_gf_interval =
+ vp9_rc_get_default_max_gf_interval(framerate, rc->min_gf_interval);
}
-#else
- if (rc->min_gf_interval == 0)
- rc->min_gf_interval = vp9_rc_get_default_min_gf_interval(
- oxcf->width, oxcf->height, cpi->framerate);
- if (rc->max_gf_interval == 0)
- rc->max_gf_interval = vp9_rc_get_default_max_gf_interval(
- cpi->framerate, rc->min_gf_interval);
-#endif
// Extended max interval for genuinely static scenes like slide shows.
rc->static_scene_max_gf_interval = MAX_STATIC_GF_GROUP_LENGTH;
@@ -2953,7 +2963,7 @@
}
}
}
- target = calc_pframe_target_size_one_pass_vbr(cpi);
+ target = vp9_calc_pframe_target_size_one_pass_vbr(cpi);
vp9_rc_set_frame_target(cpi, target);
}
rc->prev_avg_source_sad_lag = avg_source_sad_lag;
@@ -3163,7 +3173,7 @@
VPXMIN(20, VPXMAX(10, rc->baseline_gf_interval));
adjust_gfint_frame_constraint(cpi, rc->frames_to_key);
rc->frames_till_gf_update_due = rc->baseline_gf_interval;
- target = calc_pframe_target_size_one_pass_vbr(cpi);
+ target = vp9_calc_pframe_target_size_one_pass_vbr(cpi);
vp9_rc_set_frame_target(cpi, target);
rc->count_last_scene_change = 0;
} else {
--- a/vp9/encoder/vp9_ratectrl.h
+++ b/vp9/encoder/vp9_ratectrl.h
@@ -27,6 +27,9 @@
// Bits Per MB at different Q (Multiplied by 512)
#define BPER_MB_NORMBITS 9
+#define DEFAULT_KF_BOOST 2000
+#define DEFAULT_GF_BOOST 2000
+
#define MIN_GF_INTERVAL 4
#define MAX_GF_INTERVAL 16
#define FIXED_GF_INTERVAL 8 // Used in some testing modes only
@@ -204,6 +207,10 @@
int preserve_arf_as_gld;
int preserve_next_arf_as_gld;
int show_arf_as_gld;
+
+ // Flag to constrain golden frame interval on key frame frequency for 1 pass
+ // VBR.
+ int constrain_gf_key_freq_onepass_vbr;
} RATE_CONTROL;
struct VP9_COMP;
@@ -255,6 +262,9 @@
void vp9_rc_get_one_pass_cbr_params(struct VP9_COMP *cpi);
int vp9_calc_pframe_target_size_one_pass_cbr(const struct VP9_COMP *cpi);
int vp9_calc_iframe_target_size_one_pass_cbr(const struct VP9_COMP *cpi);
+int vp9_calc_pframe_target_size_one_pass_vbr(const struct VP9_COMP *cpi);
+int vp9_calc_iframe_target_size_one_pass_vbr(const struct VP9_COMP *cpi);
+void vp9_set_gf_update_one_pass_vbr(struct VP9_COMP *const cpi);
void vp9_update_buffer_level_preencode(struct VP9_COMP *cpi);
void vp9_rc_get_svc_params(struct VP9_COMP *cpi);
--- a/vp9/encoder/vp9_rd.c
+++ b/vp9/encoder/vp9_rd.c
@@ -197,28 +197,68 @@
static const int rd_frame_type_factor[FRAME_UPDATE_TYPES] = { 128, 144, 128,
128, 144, 144 };
+// Configure Vizier RD parameters.
+// Later this function will use passed in command line values.
+void vp9_init_rd_parameters(VP9_COMP *cpi) {
+ RD_CONTROL *const rdc = &cpi->rd_ctrl;
+
+ // When |use_vizier_rc_params| is 1, we expect the rd parameters have been
+ // initialized by the pass in values.
+ // Be careful that parameters below are only initialized to 1, if we do not
+ // pass values to them. It is desired to take care of each parameter when
+ // using |use_vizier_rc_params|.
+ if (cpi->twopass.use_vizier_rc_params) return;
+
+ // Make sure this function is floating point safe.
+ vpx_clear_system_state();
+
+ rdc->rd_mult_inter_qp_fac = 1.0;
+ rdc->rd_mult_arf_qp_fac = 1.0;
+ rdc->rd_mult_key_qp_fac = 1.0;
+}
+
+// Returns the default rd multiplier for inter frames for a given qindex.
+// The function here is a first pass estimate based on data from
+// a previous Vizer run
+static double def_inter_rd_multiplier(int qindex) {
+ return 4.15 + (0.001 * (double)qindex);
+}
+
+// Returns the default rd multiplier for ARF/Golden Frames for a given qindex.
+// The function here is a first pass estimate based on data from
+// a previous Vizer run
+static double def_arf_rd_multiplier(int qindex) {
+ return 4.25 + (0.001 * (double)qindex);
+}
+
+// Returns the default rd multiplier for key frames for a given qindex.
+// The function here is a first pass estimate based on data from
+// a previous Vizer run
+static double def_kf_rd_multiplier(int qindex) {
+ return 4.35 + (0.001 * (double)qindex);
+}
+
int vp9_compute_rd_mult_based_on_qindex(const VP9_COMP *cpi, int qindex) {
- // largest dc_quant is 21387, therefore rdmult should always fit in int32_t
+ const RD_CONTROL *rdc = &cpi->rd_ctrl;
const int q = vp9_dc_quant(qindex, 0, cpi->common.bit_depth);
- uint32_t rdmult = q * q;
+ // largest dc_quant is 21387, therefore rdmult should fit in int32_t
+ int rdmult = q * q;
- if (cpi->common.frame_type != KEY_FRAME) {
- if (qindex < 128)
- rdmult = rdmult * 4;
- else if (qindex < 190)
- rdmult = rdmult * 4 + rdmult / 2;
- else
- rdmult = rdmult * 3;
+ // Make sure this function is floating point safe.
+ vpx_clear_system_state();
+
+ if (cpi->common.frame_type == KEY_FRAME) {
+ double def_rd_q_mult = def_kf_rd_multiplier(qindex);
+ rdmult = (int)((double)rdmult * def_rd_q_mult * rdc->rd_mult_key_qp_fac);
+ } else if (!cpi->rc.is_src_frame_alt_ref &&
+ (cpi->refresh_golden_frame || cpi->refresh_alt_ref_frame)) {
+ double def_rd_q_mult = def_arf_rd_multiplier(qindex);
+ rdmult = (int)((double)rdmult * def_rd_q_mult * rdc->rd_mult_arf_qp_fac);
} else {
- if (qindex < 64)
- rdmult = rdmult * 4;
- else if (qindex <= 128)
- rdmult = rdmult * 3 + rdmult / 2;
- else if (qindex < 190)
- rdmult = rdmult * 4 + rdmult / 2;
- else
- rdmult = rdmult * 7 + rdmult / 2;
+ double def_rd_q_mult = def_inter_rd_multiplier(qindex);
+ rdmult = (int)((double)rdmult * def_rd_q_mult * rdc->rd_mult_inter_qp_fac);
}
+
#if CONFIG_VP9_HIGHBITDEPTH
switch (cpi->common.bit_depth) {
case VPX_BITS_10: rdmult = ROUND_POWER_OF_TWO(rdmult, 4); break;
--- a/vp9/encoder/vp9_rd.h
+++ b/vp9/encoder/vp9_rd.h
@@ -101,6 +101,13 @@
THR_INTRA,
} THR_MODES_SUB8X8;
+typedef struct {
+ // RD multiplier control factors added for Vizier project.
+ double rd_mult_inter_qp_fac;
+ double rd_mult_arf_qp_fac;
+ double rd_mult_key_qp_fac;
+} RD_CONTROL;
+
typedef struct RD_OPT {
// Thresh_mult is used to set a threshold for the rd score. A higher value
// means that we will accept the best mode so far more often. This number
@@ -143,6 +150,8 @@
struct TileDataEnc;
struct VP9_COMP;
struct macroblock;
+
+void vp9_init_rd_parameters(struct VP9_COMP *cpi);
int vp9_compute_rd_mult_based_on_qindex(const struct VP9_COMP *cpi, int qindex);
--- a/vp9/encoder/vp9_rdopt.c
+++ b/vp9/encoder/vp9_rdopt.c
@@ -745,8 +745,8 @@
MODE_INFO *const mi = xd->mi[0];
int64_t rd1, rd2, rd;
int rate;
- int64_t dist;
- int64_t sse;
+ int64_t dist = INT64_MAX;
+ int64_t sse = INT64_MAX;
const int coeff_ctx =
combine_entropy_contexts(args->t_left[blk_row], args->t_above[blk_col]);
struct buf_2d *recon = args->this_recon;
@@ -799,6 +799,13 @@
if (max_txsize_lookup[plane_bsize] == tx_size)
skip_txfm_flag = x->skip_txfm[(plane << 2) + (block >> (tx_size << 1))];
+ // This reduces the risk of bad perceptual quality due to bad prediction.
+ // We always force the encoder to perform transform and quantization.
+ if (!args->cpi->sf.allow_skip_txfm_ac_dc &&
+ skip_txfm_flag == SKIP_TXFM_AC_DC) {
+ skip_txfm_flag = SKIP_TXFM_NONE;
+ }
+
if (skip_txfm_flag == SKIP_TXFM_NONE ||
(recon && skip_txfm_flag == SKIP_TXFM_AC_ONLY)) {
// full forward transform and quantization
@@ -827,17 +834,7 @@
dist = VPXMAX(0, sse - dc_correct);
}
} else {
- // SKIP_TXFM_AC_DC
- // skip forward transform. Because this is handled here, the quantization
- // does not need to do it.
- x->plane[plane].eobs[block] = 0;
- sse = x->bsse[(plane << 2) + (block >> (tx_size << 1))] << 4;
- dist = sse;
- if (recon) {
- uint8_t *rec_ptr = &recon->buf[4 * (blk_row * recon->stride + blk_col)];
- copy_block_visible(xd, pd, dst, dst_stride, rec_ptr, recon->stride,
- blk_row, blk_col, plane_bsize, tx_bsize);
- }
+ assert(0 && "allow_skip_txfm_ac_dc does not allow SKIP_TXFM_AC_DC.");
}
}
--- a/vp9/encoder/vp9_speed_features.c
+++ b/vp9/encoder/vp9_speed_features.c
@@ -345,7 +345,6 @@
sf->intra_y_mode_mask[TX_32X32] = INTRA_DC;
sf->intra_uv_mode_mask[TX_32X32] = INTRA_DC;
sf->adaptive_interp_filter_search = 1;
- sf->allow_partition_search_skip = 1;
if (cpi->twopass.fr_content_type == FC_GRAPHICS_ANIMATION) {
for (i = 0; i < MAX_MESH_STEP; ++i) {
@@ -931,7 +930,6 @@
sf->max_delta_qindex = 0;
sf->disable_filter_search_var_thresh = 0;
sf->adaptive_interp_filter_search = 0;
- sf->allow_partition_search_skip = 0;
sf->allow_txfm_domain_distortion = 0;
sf->tx_domain_thresh = 99.0;
sf->allow_quant_coeff_opt = sf->optimize_coefficients;
@@ -940,6 +938,7 @@
sf->enable_tpl_model = oxcf->enable_tpl_model;
sf->prune_ref_frame_for_rect_partitions = 0;
sf->temporal_filter_search_method = MESH;
+ sf->allow_skip_txfm_ac_dc = 0;
for (i = 0; i < TX_SIZES; i++) {
sf->intra_y_mode_mask[i] = INTRA_ALL;
--- a/vp9/encoder/vp9_speed_features.h
+++ b/vp9/encoder/vp9_speed_features.h
@@ -525,9 +525,6 @@
int prune_rect_thresh[4];
} rd_ml_partition;
- // Allow skipping partition search for still image frame
- int allow_partition_search_skip;
-
// Fast approximation of vp9_model_rd_from_var_lapndz
int simple_model_rd_from_var;
@@ -612,6 +609,12 @@
// For real-time mode: force DC only under intra search when content
// does not have high souce SAD.
int rt_intra_dc_only_low_content;
+
+ // The encoder has a feature that skips forward transform and quantization
+ // based on a model rd estimation to reduce encoding time.
+ // However, this feature is dangerous since it could lead to bad perceptual
+ // quality. This flag is added to guard the feature.
+ int allow_skip_txfm_ac_dc;
} SPEED_FEATURES;
struct VP9_COMP;
--- a/vp9/encoder/vp9_svc_layercontext.c
+++ b/vp9/encoder/vp9_svc_layercontext.c
@@ -322,8 +322,8 @@
const int prev_layer_target_bandwidth =
oxcf->layer_target_bitrate[st_idx - 1];
lc->avg_frame_size =
- (int)((lc->target_bandwidth - prev_layer_target_bandwidth) /
- (lc->framerate - prev_layer_framerate));
+ (int)round((lc->target_bandwidth - prev_layer_target_bandwidth) /
+ (lc->framerate - prev_layer_framerate));
}
}
@@ -956,7 +956,7 @@
if (cpi->common.frame_type != KEY_FRAME && !cpi->ext_refresh_last_frame &&
!cpi->ext_refresh_golden_frame && !cpi->ext_refresh_alt_ref_frame)
svc->non_reference_frame = 1;
- // For non-flexible mode, where update_buffer_slot is used, need to check if
+ // For flexible mode, where update_buffer_slot is used, need to check if
// all buffer slots are not refreshed.
if (svc->temporal_layering_mode == VP9E_TEMPORAL_LAYERING_MODE_BYPASS) {
if (svc->update_buffer_slot[svc->spatial_layer_id] != 0)
--- a/vp9/encoder/vp9_svc_layercontext.h
+++ b/vp9/encoder/vp9_svc_layercontext.h
@@ -173,6 +173,8 @@
uint8_t fb_idx_temporal_layer_id[REF_FRAMES];
int spatial_layer_sync[VPX_SS_MAX_LAYERS];
+ // Quantizer for each spatial layer.
+ int base_qindex[VPX_SS_MAX_LAYERS];
uint8_t set_intra_only_frame;
uint8_t previous_frame_is_intra_only;
uint8_t superframe_has_layer_sync;
--- a/vp9/ratectrl_rtc.cc
+++ b/vp9/ratectrl_rtc.cc
@@ -11,6 +11,7 @@
#include <new>
+#include "vp9/common/vp9_common.h"
#include "vp9/encoder/vp9_encoder.h"
#include "vp9/encoder/vp9_picklpf.h"
#include "vpx/vp8cx.h"
@@ -24,10 +25,19 @@
VP9RateControlRTC());
if (!rc_api) return nullptr;
rc_api->cpi_ = static_cast<VP9_COMP *>(vpx_memalign(32, sizeof(*cpi_)));
- if (rc_api->cpi_ == nullptr) {
- return nullptr;
- }
+ if (!rc_api->cpi_) return nullptr;
+ vp9_zero(*rc_api->cpi_);
+
rc_api->InitRateControl(cfg);
+ if (cfg.aq_mode) {
+ VP9_COMP *const cpi = rc_api->cpi_;
+ cpi->segmentation_map = static_cast<uint8_t *>(
+ vpx_calloc(cpi->common.mi_rows * cpi->common.mi_cols,
+ sizeof(*cpi->segmentation_map)));
+ cpi->cyclic_refresh =
+ vp9_cyclic_refresh_alloc(cpi->common.mi_rows, cpi->common.mi_cols);
+ cpi->cyclic_refresh->content_mode = 0;
+ }
return rc_api;
}
@@ -38,13 +48,18 @@
cm->profile = PROFILE_0;
cm->bit_depth = VPX_BITS_8;
cm->show_frame = 1;
- oxcf->rc_mode = VPX_CBR;
+ oxcf->profile = cm->profile;
+ oxcf->bit_depth = cm->bit_depth;
+ oxcf->rc_mode = rc_cfg.rc_mode;
oxcf->pass = 0;
- oxcf->aq_mode = NO_AQ;
+ oxcf->aq_mode = rc_cfg.aq_mode ? CYCLIC_REFRESH_AQ : NO_AQ;
oxcf->content = VP9E_CONTENT_DEFAULT;
oxcf->drop_frames_water_mark = 0;
+ cm->current_video_frame = 0;
+ rc->kf_boost = DEFAULT_KF_BOOST;
UpdateRateControl(rc_cfg);
+ vp9_set_mb_mi(cm, cm->width, cm->height);
cpi_->use_svc = (cpi_->svc.number_spatial_layers > 1 ||
cpi_->svc.number_temporal_layers > 1)
@@ -55,8 +70,8 @@
rc->rc_2_frame = 0;
vp9_rc_init_minq_luts();
vp9_rc_init(oxcf, 0, rc);
+ rc->constrain_gf_key_freq_onepass_vbr = 0;
cpi_->sf.use_nonrd_pick_mode = 1;
- cm->current_video_frame = 0;
}
void VP9RateControlRTC::UpdateRateControl(
@@ -73,6 +88,7 @@
oxcf->best_allowed_q = vp9_quantizer_to_qindex(rc_cfg.min_quantizer);
rc->worst_quality = oxcf->worst_allowed_q;
rc->best_quality = oxcf->best_allowed_q;
+ oxcf->init_framerate = rc_cfg.framerate;
oxcf->target_bandwidth = 1000 * rc_cfg.target_bandwidth;
oxcf->starting_buffer_level_ms = rc_cfg.buf_initial_sz;
oxcf->optimal_buffer_level_ms = rc_cfg.buf_optimal_sz;
@@ -85,10 +101,11 @@
(rc_cfg.ts_number_layers > 1) ? rc_cfg.ts_number_layers : 0);
cpi_->oxcf.rc_max_intra_bitrate_pct = rc_cfg.max_intra_bitrate_pct;
+ cpi_->oxcf.rc_max_inter_bitrate_pct = rc_cfg.max_inter_bitrate_pct;
cpi_->framerate = rc_cfg.framerate;
cpi_->svc.number_spatial_layers = rc_cfg.ss_number_layers;
cpi_->svc.number_temporal_layers = rc_cfg.ts_number_layers;
-
+ vp9_set_mb_mi(cm, cm->width, cm->height);
for (int sl = 0; sl < cpi_->svc.number_spatial_layers; ++sl) {
for (int tl = 0; tl < cpi_->svc.number_temporal_layers; ++tl) {
const int layer =
@@ -138,11 +155,27 @@
cpi_->sf.use_nonrd_pick_mode = 1;
if (cpi_->svc.number_spatial_layers == 1 &&
cpi_->svc.number_temporal_layers == 1) {
- int target;
- if (frame_is_intra_only(cm))
- target = vp9_calc_iframe_target_size_one_pass_cbr(cpi_);
- else
- target = vp9_calc_pframe_target_size_one_pass_cbr(cpi_);
+ int target = 0;
+ if (cpi_->oxcf.rc_mode == VPX_CBR) {
+ if (cpi_->oxcf.aq_mode == CYCLIC_REFRESH_AQ)
+ vp9_cyclic_refresh_update_parameters(cpi_);
+ if (frame_is_intra_only(cm))
+ target = vp9_calc_iframe_target_size_one_pass_cbr(cpi_);
+ else
+ target = vp9_calc_pframe_target_size_one_pass_cbr(cpi_);
+ } else if (cpi_->oxcf.rc_mode == VPX_VBR) {
+ if (cm->frame_type == KEY_FRAME) {
+ cpi_->rc.this_key_frame_forced = cm->current_video_frame != 0;
+ cpi_->rc.frames_to_key = cpi_->oxcf.key_freq;
+ }
+ vp9_set_gf_update_one_pass_vbr(cpi_);
+ if (cpi_->oxcf.aq_mode == CYCLIC_REFRESH_AQ)
+ vp9_cyclic_refresh_update_parameters(cpi_);
+ if (frame_is_intra_only(cm))
+ target = vp9_calc_iframe_target_size_one_pass_vbr(cpi_);
+ else
+ target = vp9_calc_pframe_target_size_one_pass_vbr(cpi_);
+ }
vp9_rc_set_frame_target(cpi_, target);
vp9_update_buffer_level_preencode(cpi_);
} else {
@@ -153,6 +186,8 @@
int bottom_index, top_index;
cpi_->common.base_qindex =
vp9_rc_pick_q_and_bounds(cpi_, &bottom_index, &top_index);
+
+ if (cpi_->oxcf.aq_mode == CYCLIC_REFRESH_AQ) vp9_cyclic_refresh_setup(cpi_);
}
int VP9RateControlRTC::GetQP() const { return cpi_->common.base_qindex; }
@@ -161,6 +196,14 @@
struct loopfilter *const lf = &cpi_->common.lf;
vp9_pick_filter_level(nullptr, cpi_, LPF_PICK_FROM_Q);
return lf->filter_level;
+}
+
+signed char *VP9RateControlRTC::GetCyclicRefreshMap() const {
+ return cpi_->cyclic_refresh->map;
+}
+
+int *VP9RateControlRTC::GetDeltaQ() const {
+ return cpi_->cyclic_refresh->qindex_delta;
}
void VP9RateControlRTC::PostEncodeUpdate(uint64_t encoded_frame_size) {
--- a/vp9/ratectrl_rtc.h
+++ b/vp9/ratectrl_rtc.h
@@ -18,27 +18,30 @@
#include "vp9/common/vp9_enums.h"
#include "vp9/common/vp9_onyxc_int.h"
#include "vp9/vp9_iface_common.h"
+#include "vp9/encoder/vp9_aq_cyclicrefresh.h"
#include "vp9/encoder/vp9_encoder.h"
#include "vp9/encoder/vp9_firstpass.h"
#include "vp9/vp9_cx_iface.h"
+#include "vpx/internal/vpx_ratectrl_rtc.h"
#include "vpx_mem/vpx_mem.h"
namespace libvpx {
-struct VP9RateControlRtcConfig {
- int width;
- int height;
- // 0-63
- int max_quantizer;
- int min_quantizer;
- int64_t target_bandwidth;
- int64_t buf_initial_sz;
- int64_t buf_optimal_sz;
- int64_t buf_sz;
- int undershoot_pct;
- int overshoot_pct;
- int max_intra_bitrate_pct;
- double framerate;
+struct VP9RateControlRtcConfig : public VpxRateControlRtcConfig {
+ public:
+ VP9RateControlRtcConfig() {
+ vp9_zero(max_quantizers);
+ vp9_zero(min_quantizers);
+ vp9_zero(scaling_factor_den);
+ vp9_zero(scaling_factor_num);
+ vp9_zero(layer_target_bitrate);
+ vp9_zero(ts_rate_decimator);
+ scaling_factor_num[0] = 1;
+ scaling_factor_den[0] = 1;
+ max_quantizers[0] = max_quantizer;
+ min_quantizers[0] = min_quantizer;
+ }
+
// Number of spatial layers
int ss_number_layers;
// Number of temporal layers
@@ -47,8 +50,6 @@
int min_quantizers[VPX_MAX_LAYERS];
int scaling_factor_num[VPX_SS_MAX_LAYERS];
int scaling_factor_den[VPX_SS_MAX_LAYERS];
- int layer_target_bitrate[VPX_MAX_LAYERS];
- int ts_rate_decimator[VPX_TS_MAX_LAYERS];
};
struct VP9FrameParamsQpRTC {
@@ -58,7 +59,7 @@
};
// This interface allows using VP9 real-time rate control without initializing
-// the encoder. To use this interface, you need to link with libvp9rc.a.
+// the encoder. To use this interface, you need to link with libvpxrc.a.
//
// #include "vp9/ratectrl_rtc.h"
// VP9RateControlRTC rc_api;
@@ -84,15 +85,23 @@
const VP9RateControlRtcConfig &cfg);
~VP9RateControlRTC() {
if (cpi_) {
- for (int sl = 0; sl < cpi_->svc.number_spatial_layers; sl++) {
- for (int tl = 0; tl < cpi_->svc.number_temporal_layers; tl++) {
- int layer = LAYER_IDS_TO_IDX(sl, tl, cpi_->oxcf.ts_number_layers);
- LAYER_CONTEXT *const lc = &cpi_->svc.layer_context[layer];
- vpx_free(lc->map);
- vpx_free(lc->last_coded_q_map);
- vpx_free(lc->consec_zero_mv);
+ if (cpi_->svc.number_spatial_layers > 1 ||
+ cpi_->svc.number_temporal_layers > 1) {
+ for (int sl = 0; sl < cpi_->svc.number_spatial_layers; sl++) {
+ for (int tl = 0; tl < cpi_->svc.number_temporal_layers; tl++) {
+ int layer = LAYER_IDS_TO_IDX(sl, tl, cpi_->oxcf.ts_number_layers);
+ LAYER_CONTEXT *const lc = &cpi_->svc.layer_context[layer];
+ vpx_free(lc->map);
+ vpx_free(lc->last_coded_q_map);
+ vpx_free(lc->consec_zero_mv);
+ }
}
}
+ if (cpi_->oxcf.aq_mode == CYCLIC_REFRESH_AQ) {
+ vpx_free(cpi_->segmentation_map);
+ cpi_->segmentation_map = NULL;
+ vp9_cyclic_refresh_free(cpi_->cyclic_refresh);
+ }
vpx_free(cpi_);
}
}
@@ -101,6 +110,8 @@
// GetQP() needs to be called after ComputeQP() to get the latest QP
int GetQP() const;
int GetLoopfilterLevel() const;
+ signed char *GetCyclicRefreshMap() const;
+ int *GetDeltaQ() const;
void ComputeQP(const VP9FrameParamsQpRTC &frame_params);
// Feedback to rate control with the size of current encoded frame
void PostEncodeUpdate(uint64_t encoded_frame_size);
--- a/vp9/simple_encode.cc
+++ b/vp9/simple_encode.cc
@@ -793,6 +793,7 @@
if (enc_pass == VPX_RC_FIRST_PASS) {
oxcf.lag_in_frames = 0;
}
+ oxcf.use_simple_encode_api = 1;
return oxcf;
}
@@ -872,14 +873,14 @@
const VP9EncoderConfig oxcf = GetEncodeConfig(
frame_width_, frame_height_, frame_rate, target_bitrate_, encode_speed_,
VPX_RC_FIRST_PASS, impl_ptr_->encode_config_list);
- VP9_COMP *cpi = init_encoder(&oxcf, impl_ptr_->img_fmt);
- struct lookahead_ctx *lookahead = cpi->lookahead;
+ impl_ptr_->cpi = init_encoder(&oxcf, impl_ptr_->img_fmt);
+ struct lookahead_ctx *lookahead = impl_ptr_->cpi->lookahead;
int i;
int use_highbitdepth = 0;
const int num_rows_16x16 = get_num_unit_16x16(frame_height_);
const int num_cols_16x16 = get_num_unit_16x16(frame_width_);
#if CONFIG_VP9_HIGHBITDEPTH
- use_highbitdepth = cpi->common.use_highbitdepth;
+ use_highbitdepth = impl_ptr_->cpi->common.use_highbitdepth;
#endif
vpx_image_t img;
vpx_img_alloc(&img, impl_ptr_->img_fmt, frame_width_, frame_height_, 1);
@@ -905,30 +906,35 @@
ENCODE_FRAME_RESULT encode_frame_info;
vp9_init_encode_frame_result(&encode_frame_info);
// TODO(angiebird): Call vp9_first_pass directly
- vp9_get_compressed_data(cpi, &frame_flags, &size, nullptr, &time_stamp,
- &time_end, flush, &encode_frame_info);
+ vp9_get_compressed_data(impl_ptr_->cpi, &frame_flags, &size, nullptr,
+ &time_stamp, &time_end, flush,
+ &encode_frame_info);
// vp9_get_compressed_data only generates first pass stats not
// compresses data
assert(size == 0);
// Get vp9 first pass motion vector info.
std::vector<MotionVectorInfo> mv_info(num_rows_16x16 * num_cols_16x16);
- update_motion_vector_info(cpi->fp_motion_vector_info, num_rows_16x16,
- num_cols_16x16, mv_info.data(),
- kMotionVectorFullPixelPrecision);
+ update_motion_vector_info(
+ impl_ptr_->cpi->fp_motion_vector_info, num_rows_16x16,
+ num_cols_16x16, mv_info.data(), kMotionVectorFullPixelPrecision);
fp_motion_vector_info_.push_back(mv_info);
}
- impl_ptr_->first_pass_stats.push_back(vp9_get_frame_stats(&cpi->twopass));
+ impl_ptr_->first_pass_stats.push_back(
+ vp9_get_frame_stats(&impl_ptr_->cpi->twopass));
}
}
- vp9_end_first_pass(cpi);
// TODO(angiebird): Store the total_stats apart form first_pass_stats
- impl_ptr_->first_pass_stats.push_back(vp9_get_total_stats(&cpi->twopass));
- free_encoder(cpi);
- rewind(in_file_);
- vpx_img_free(&img);
+ impl_ptr_->first_pass_stats.push_back(
+ vp9_get_total_stats(&impl_ptr_->cpi->twopass));
+ vp9_end_first_pass(impl_ptr_->cpi);
// Generate key_frame_map based on impl_ptr_->first_pass_stats.
key_frame_map_ = ComputeKeyFrameMap();
+
+ free_encoder(impl_ptr_->cpi);
+ impl_ptr_->cpi = nullptr;
+ rewind(in_file_);
+ vpx_img_free(&img);
}
std::vector<std::vector<double>> SimpleEncode::ObserveFirstPassStats() {
@@ -1004,8 +1010,7 @@
static GOP_COMMAND GetGopCommand(const std::vector<int> &gop_map,
int start_show_index) {
GOP_COMMAND gop_command;
- if (gop_map.size() > 0) {
- assert(static_cast<size_t>(start_show_index) < gop_map.size());
+ if (static_cast<size_t>(start_show_index) < gop_map.size()) {
assert((gop_map[start_show_index] & kGopMapFlagStart) != 0);
int end_show_index = start_show_index + 1;
// gop_map[end_show_index] & kGopMapFlagStart == 0 means this is
@@ -1049,6 +1054,11 @@
frame_coding_index_ = 0;
show_frame_count_ = 0;
+ assert(impl_ptr_->cpi != nullptr);
+ FRAME_INFO frame_info = vp9_get_frame_info(&oxcf);
+ unsigned int screen_area = frame_info.frame_width * frame_info.frame_height;
+ vp9_init_vizier_params(&impl_ptr_->cpi->twopass, screen_area);
+
UpdateKeyFrameGroup(show_frame_count_);
const GOP_COMMAND gop_command = GetGopCommand(gop_map_, show_frame_count_);
@@ -1084,8 +1094,7 @@
const VP9_COMP *cpi = impl_ptr_->cpi;
key_frame_group_index_ = 0;
key_frame_group_size_ = vp9_get_frames_to_next_key(
- &cpi->oxcf, &cpi->frame_info, &cpi->twopass.first_pass_info,
- key_frame_show_index, cpi->rc.min_gf_interval);
+ &cpi->oxcf, &cpi->twopass, key_frame_show_index, cpi->rc.min_gf_interval);
assert(key_frame_group_size_ > 0);
// Init the reference frame info when a new key frame group appears.
InitRefFrameInfo(&ref_frame_info_);
@@ -1239,7 +1248,7 @@
start_show_index += gop_command.show_frame_count;
coding_frame_count += gop_command_coding_frame_count(&gop_command);
}
- assert(start_show_index == gop_map.size());
+ assert(static_cast<size_t>(start_show_index) == gop_map.size());
return coding_frame_count;
}
@@ -1250,6 +1259,7 @@
}
// These are the default settings for now.
+ TWO_PASS twopass;
const int multi_layer_arf = 0;
const int allow_alt_ref = 1;
vpx_rational_t frame_rate =
@@ -1258,30 +1268,30 @@
frame_width_, frame_height_, frame_rate, target_bitrate_, encode_speed_,
VPX_RC_LAST_PASS, impl_ptr_->encode_config_list);
FRAME_INFO frame_info = vp9_get_frame_info(&oxcf);
- FIRST_PASS_INFO first_pass_info;
- fps_init_first_pass_info(&first_pass_info,
+ fps_init_first_pass_info(&twopass.first_pass_info,
GetVectorData(impl_ptr_->first_pass_stats),
num_frames_);
- return vp9_get_coding_frame_num(&oxcf, &frame_info, &first_pass_info,
- multi_layer_arf, allow_alt_ref);
+ unsigned int screen_area = frame_info.frame_width * frame_info.frame_height;
+ vp9_init_vizier_params(&twopass, screen_area);
+ return vp9_get_coding_frame_num(&oxcf, &twopass, &frame_info, multi_layer_arf,
+ allow_alt_ref);
}
std::vector<int> SimpleEncode::ComputeKeyFrameMap() const {
// The last entry of first_pass_stats is the overall stats.
- assert(impl_ptr_->first_pass_stats.size() == num_frames_ + 1);
+ assert(impl_ptr_->first_pass_stats.size() ==
+ static_cast<size_t>(num_frames_) + 1);
vpx_rational_t frame_rate =
make_vpx_rational(frame_rate_num_, frame_rate_den_);
const VP9EncoderConfig oxcf = GetEncodeConfig(
frame_width_, frame_height_, frame_rate, target_bitrate_, encode_speed_,
VPX_RC_LAST_PASS, impl_ptr_->encode_config_list);
- FRAME_INFO frame_info = vp9_get_frame_info(&oxcf);
- FIRST_PASS_INFO first_pass_info;
- fps_init_first_pass_info(&first_pass_info,
+ TWO_PASS twopass;
+ fps_init_first_pass_info(&twopass.first_pass_info,
GetVectorData(impl_ptr_->first_pass_stats),
num_frames_);
std::vector<int> key_frame_map(num_frames_, 0);
- vp9_get_key_frame_map(&oxcf, &frame_info, &first_pass_info,
- GetVectorData(key_frame_map));
+ vp9_get_key_frame_map(&oxcf, &twopass, GetVectorData(key_frame_map));
return key_frame_map;
}
--- a/vp9/vp9_cx_iface.c
+++ b/vp9/vp9_cx_iface.c
@@ -348,6 +348,24 @@
}
RANGE_CHECK(extra_cfg, color_space, VPX_CS_UNKNOWN, VPX_CS_SRGB);
RANGE_CHECK(extra_cfg, color_range, VPX_CR_STUDIO_RANGE, VPX_CR_FULL_RANGE);
+
+ // The range below shall be further tuned.
+ RANGE_CHECK(cfg, use_vizier_rc_params, 0, 1);
+ RANGE_CHECK(cfg, active_wq_factor.den, 1, 1000);
+ RANGE_CHECK(cfg, err_per_mb_factor.den, 1, 1000);
+ RANGE_CHECK(cfg, sr_default_decay_limit.den, 1, 1000);
+ RANGE_CHECK(cfg, sr_diff_factor.den, 1, 1000);
+ RANGE_CHECK(cfg, kf_err_per_mb_factor.den, 1, 1000);
+ RANGE_CHECK(cfg, kf_frame_min_boost_factor.den, 1, 1000);
+ RANGE_CHECK(cfg, kf_frame_max_boost_subs_factor.den, 1, 1000);
+ RANGE_CHECK(cfg, kf_max_total_boost_factor.den, 1, 1000);
+ RANGE_CHECK(cfg, gf_max_total_boost_factor.den, 1, 1000);
+ RANGE_CHECK(cfg, gf_frame_max_boost_factor.den, 1, 1000);
+ RANGE_CHECK(cfg, zm_factor.den, 1, 1000);
+ RANGE_CHECK(cfg, rd_mult_inter_qp_fac.den, 1, 1000);
+ RANGE_CHECK(cfg, rd_mult_arf_qp_fac.den, 1, 1000);
+ RANGE_CHECK(cfg, rd_mult_key_qp_fac.den, 1, 1000);
+
return VPX_CODEC_OK;
}
@@ -565,10 +583,6 @@
vp9_set_first_pass_stats(oxcf, &cfg->rc_twopass_stats_in);
-#if CONFIG_FP_MB_STATS
- oxcf->firstpass_mb_stats_in = cfg->rc_firstpass_mb_stats_in;
-#endif
-
oxcf->color_space = extra_cfg->color_space;
oxcf->color_range = extra_cfg->color_range;
oxcf->render_width = extra_cfg->render_width;
@@ -634,10 +648,135 @@
}
if (get_level_index(oxcf->target_level) >= 0) config_target_level(oxcf);
+ oxcf->use_simple_encode_api = 0;
// vp9_dump_encoder_config(oxcf, stderr);
return VPX_CODEC_OK;
}
+static vpx_codec_err_t set_twopass_params_from_config(
+ const vpx_codec_enc_cfg_t *const cfg, struct VP9_COMP *cpi) {
+ if (!cfg->use_vizier_rc_params) return VPX_CODEC_OK;
+ if (cpi == NULL) return VPX_CODEC_ERROR;
+
+ cpi->twopass.use_vizier_rc_params = cfg->use_vizier_rc_params;
+
+ // The values set here are factors that will be applied to default values
+ // to get the final value used in the two pass code. Hence 1.0 will
+ // match the default behaviour when not using passed in values.
+ // We also apply limits here to prevent the user from applying settings
+ // that make no sense.
+ cpi->twopass.active_wq_factor =
+ (double)cfg->active_wq_factor.num / (double)cfg->active_wq_factor.den;
+ if (cpi->twopass.active_wq_factor < 0.25)
+ cpi->twopass.active_wq_factor = 0.25;
+ else if (cpi->twopass.active_wq_factor > 16.0)
+ cpi->twopass.active_wq_factor = 16.0;
+
+ cpi->twopass.err_per_mb =
+ (double)cfg->err_per_mb_factor.num / (double)cfg->err_per_mb_factor.den;
+ if (cpi->twopass.err_per_mb < 0.25)
+ cpi->twopass.err_per_mb = 0.25;
+ else if (cpi->twopass.err_per_mb > 4.0)
+ cpi->twopass.err_per_mb = 4.0;
+
+ cpi->twopass.sr_default_decay_limit =
+ (double)cfg->sr_default_decay_limit.num /
+ (double)cfg->sr_default_decay_limit.den;
+ if (cpi->twopass.sr_default_decay_limit < 0.25)
+ cpi->twopass.sr_default_decay_limit = 0.25;
+ // If the default changes this will need to change.
+ else if (cpi->twopass.sr_default_decay_limit > 1.33)
+ cpi->twopass.sr_default_decay_limit = 1.33;
+
+ cpi->twopass.sr_diff_factor =
+ (double)cfg->sr_diff_factor.num / (double)cfg->sr_diff_factor.den;
+ if (cpi->twopass.sr_diff_factor < 0.25)
+ cpi->twopass.sr_diff_factor = 0.25;
+ else if (cpi->twopass.sr_diff_factor > 4.0)
+ cpi->twopass.sr_diff_factor = 4.0;
+
+ cpi->twopass.kf_err_per_mb = (double)cfg->kf_err_per_mb_factor.num /
+ (double)cfg->kf_err_per_mb_factor.den;
+ if (cpi->twopass.kf_err_per_mb < 0.25)
+ cpi->twopass.kf_err_per_mb = 0.25;
+ else if (cpi->twopass.kf_err_per_mb > 4.0)
+ cpi->twopass.kf_err_per_mb = 4.0;
+
+ cpi->twopass.kf_frame_min_boost = (double)cfg->kf_frame_min_boost_factor.num /
+ (double)cfg->kf_frame_min_boost_factor.den;
+ if (cpi->twopass.kf_frame_min_boost < 0.25)
+ cpi->twopass.kf_frame_min_boost = 0.25;
+ else if (cpi->twopass.kf_frame_min_boost > 4.0)
+ cpi->twopass.kf_frame_min_boost = 4.0;
+
+ cpi->twopass.kf_frame_max_boost_first =
+ (double)cfg->kf_frame_max_boost_first_factor.num /
+ (double)cfg->kf_frame_max_boost_first_factor.den;
+ if (cpi->twopass.kf_frame_max_boost_first < 0.25)
+ cpi->twopass.kf_frame_max_boost_first = 0.25;
+ else if (cpi->twopass.kf_frame_max_boost_first > 4.0)
+ cpi->twopass.kf_frame_max_boost_first = 4.0;
+
+ cpi->twopass.kf_frame_max_boost_subs =
+ (double)cfg->kf_frame_max_boost_subs_factor.num /
+ (double)cfg->kf_frame_max_boost_subs_factor.den;
+ if (cpi->twopass.kf_frame_max_boost_subs < 0.25)
+ cpi->twopass.kf_frame_max_boost_subs = 0.25;
+ else if (cpi->twopass.kf_frame_max_boost_subs > 4.0)
+ cpi->twopass.kf_frame_max_boost_subs = 4.0;
+
+ cpi->twopass.kf_max_total_boost = (double)cfg->kf_max_total_boost_factor.num /
+ (double)cfg->kf_max_total_boost_factor.den;
+ if (cpi->twopass.kf_max_total_boost < 0.25)
+ cpi->twopass.kf_max_total_boost = 0.25;
+ else if (cpi->twopass.kf_max_total_boost > 4.0)
+ cpi->twopass.kf_max_total_boost = 4.0;
+
+ cpi->twopass.gf_max_total_boost = (double)cfg->gf_max_total_boost_factor.num /
+ (double)cfg->gf_max_total_boost_factor.den;
+ if (cpi->twopass.gf_max_total_boost < 0.25)
+ cpi->twopass.gf_max_total_boost = 0.25;
+ else if (cpi->twopass.gf_max_total_boost > 4.0)
+ cpi->twopass.gf_max_total_boost = 4.0;
+
+ cpi->twopass.gf_frame_max_boost = (double)cfg->gf_frame_max_boost_factor.num /
+ (double)cfg->gf_frame_max_boost_factor.den;
+ if (cpi->twopass.gf_frame_max_boost < 0.25)
+ cpi->twopass.gf_frame_max_boost = 0.25;
+ else if (cpi->twopass.gf_frame_max_boost > 4.0)
+ cpi->twopass.gf_frame_max_boost = 4.0;
+
+ cpi->twopass.zm_factor =
+ (double)cfg->zm_factor.num / (double)cfg->zm_factor.den;
+ if (cpi->twopass.zm_factor < 0.25)
+ cpi->twopass.zm_factor = 0.25;
+ else if (cpi->twopass.zm_factor > 2.0)
+ cpi->twopass.zm_factor = 2.0;
+
+ cpi->rd_ctrl.rd_mult_inter_qp_fac = (double)cfg->rd_mult_inter_qp_fac.num /
+ (double)cfg->rd_mult_inter_qp_fac.den;
+ if (cpi->rd_ctrl.rd_mult_inter_qp_fac < 0.25)
+ cpi->rd_ctrl.rd_mult_inter_qp_fac = 0.25;
+ else if (cpi->rd_ctrl.rd_mult_inter_qp_fac > 4.0)
+ cpi->rd_ctrl.rd_mult_inter_qp_fac = 4.0;
+
+ cpi->rd_ctrl.rd_mult_arf_qp_fac =
+ (double)cfg->rd_mult_arf_qp_fac.num / (double)cfg->rd_mult_arf_qp_fac.den;
+ if (cpi->rd_ctrl.rd_mult_arf_qp_fac < 0.25)
+ cpi->rd_ctrl.rd_mult_arf_qp_fac = 0.25;
+ else if (cpi->rd_ctrl.rd_mult_arf_qp_fac > 4.0)
+ cpi->rd_ctrl.rd_mult_arf_qp_fac = 4.0;
+
+ cpi->rd_ctrl.rd_mult_key_qp_fac =
+ (double)cfg->rd_mult_key_qp_fac.num / (double)cfg->rd_mult_key_qp_fac.den;
+ if (cpi->rd_ctrl.rd_mult_key_qp_fac < 0.25)
+ cpi->rd_ctrl.rd_mult_key_qp_fac = 0.25;
+ else if (cpi->rd_ctrl.rd_mult_key_qp_fac > 4.0)
+ cpi->rd_ctrl.rd_mult_key_qp_fac = 4.0;
+
+ return VPX_CODEC_OK;
+}
+
static vpx_codec_err_t encoder_set_config(vpx_codec_alg_priv_t *ctx,
const vpx_codec_enc_cfg_t *cfg) {
vpx_codec_err_t res;
@@ -664,6 +803,7 @@
if (res == VPX_CODEC_OK) {
ctx->cfg = *cfg;
set_encoder_config(&ctx->oxcf, &ctx->cfg, &ctx->extra_cfg);
+ set_twopass_params_from_config(&ctx->cfg, ctx->cpi);
// On profile change, request a key frame
force_key |= ctx->cpi->common.profile != ctx->oxcf.profile;
vp9_change_config(ctx->cpi, &ctx->oxcf);
@@ -690,6 +830,25 @@
return VPX_CODEC_OK;
}
+static vpx_codec_err_t ctrl_get_quantizer_svc_layers(vpx_codec_alg_priv_t *ctx,
+ va_list args) {
+ int *const arg = va_arg(args, int *);
+ int i;
+ if (arg == NULL) return VPX_CODEC_INVALID_PARAM;
+ for (i = 0; i < VPX_SS_MAX_LAYERS; i++) {
+ arg[i] = ctx->cpi->svc.base_qindex[i];
+ }
+ return VPX_CODEC_OK;
+}
+
+static vpx_codec_err_t ctrl_get_loopfilter_level(vpx_codec_alg_priv_t *ctx,
+ va_list args) {
+ int *const arg = va_arg(args, int *);
+ if (arg == NULL) return VPX_CODEC_INVALID_PARAM;
+ *arg = ctx->cpi->common.lf.filter_level;
+ return VPX_CODEC_OK;
+}
+
static vpx_codec_err_t update_extra_cfg(vpx_codec_alg_priv_t *ctx,
const struct vp9_extracfg *extra_cfg) {
const vpx_codec_err_t res = validate_config(ctx, &ctx->cfg, extra_cfg);
@@ -696,6 +855,7 @@
if (res == VPX_CODEC_OK) {
ctx->extra_cfg = *extra_cfg;
set_encoder_config(&ctx->oxcf, &ctx->cfg, &ctx->extra_cfg);
+ set_twopass_params_from_config(&ctx->cfg, ctx->cpi);
vp9_change_config(ctx->cpi, &ctx->oxcf);
}
return res;
@@ -886,6 +1046,18 @@
return update_extra_cfg(ctx, &extra_cfg);
}
+static vpx_codec_err_t ctrl_set_rtc_external_ratectrl(vpx_codec_alg_priv_t *ctx,
+ va_list args) {
+ VP9_COMP *const cpi = ctx->cpi;
+ const unsigned int data = va_arg(args, unsigned int);
+ if (data) {
+ cpi->compute_frame_low_motion_onepass = 0;
+ cpi->rc.constrain_gf_key_freq_onepass_vbr = 0;
+ cpi->cyclic_refresh->content_mode = 0;
+ }
+ return VPX_CODEC_OK;
+}
+
static vpx_codec_err_t ctrl_enable_motion_vector_unit_test(
vpx_codec_alg_priv_t *ctx, va_list args) {
struct vp9_extracfg extra_cfg = ctx->extra_cfg;
@@ -940,6 +1112,7 @@
#endif
priv->cpi = vp9_create_compressor(&priv->oxcf, priv->buffer_pool);
if (priv->cpi == NULL) res = VPX_CODEC_MEM_ERROR;
+ set_twopass_params_from_config(&priv->cfg, priv->cpi);
}
}
@@ -1744,6 +1917,7 @@
if (oxcf->pass == 2) {
const FRAME_INFO *frame_info = &cpi->frame_info;
vpx_rc_config_t ratectrl_config;
+ vpx_codec_err_t codec_status;
ratectrl_config.frame_width = frame_info->frame_width;
ratectrl_config.frame_height = frame_info->frame_height;
@@ -1755,7 +1929,10 @@
ratectrl_config.frame_rate_num = oxcf->g_timebase.den;
ratectrl_config.frame_rate_den = oxcf->g_timebase.num;
- vp9_extrc_create(funcs, ratectrl_config, ext_ratectrl);
+ codec_status = vp9_extrc_create(funcs, ratectrl_config, ext_ratectrl);
+ if (codec_status != VPX_CODEC_OK) {
+ return codec_status;
+ }
}
return VPX_CODEC_OK;
}
@@ -1812,11 +1989,14 @@
{ VP9E_SET_SVC_SPATIAL_LAYER_SYNC, ctrl_set_svc_spatial_layer_sync },
{ VP9E_SET_DELTA_Q_UV, ctrl_set_delta_q_uv },
{ VP9E_SET_DISABLE_LOOPFILTER, ctrl_set_disable_loopfilter },
+ { VP9E_SET_RTC_EXTERNAL_RATECTRL, ctrl_set_rtc_external_ratectrl },
{ VP9E_SET_EXTERNAL_RATE_CONTROL, ctrl_set_external_rate_control },
// Getters
{ VP8E_GET_LAST_QUANTIZER, ctrl_get_quantizer },
{ VP8E_GET_LAST_QUANTIZER_64, ctrl_get_quantizer64 },
+ { VP9E_GET_LAST_QUANTIZER_SVC_LAYERS, ctrl_get_quantizer_svc_layers },
+ { VP9E_GET_LOOPFILTER_LEVEL, ctrl_get_loopfilter_level },
{ VP9_GET_REFERENCE, ctrl_get_reference },
{ VP9E_GET_SVC_LAYER_ID, ctrl_get_svc_layer_id },
{ VP9E_GET_ACTIVEMAP, ctrl_get_active_map },
@@ -1879,14 +2059,30 @@
VPX_SS_DEFAULT_LAYERS, // ss_number_layers
{ 0 },
- { 0 }, // ss_target_bitrate
- 1, // ts_number_layers
- { 0 }, // ts_target_bitrate
- { 0 }, // ts_rate_decimator
- 0, // ts_periodicity
- { 0 }, // ts_layer_id
- { 0 }, // layer_taget_bitrate
- 0 // temporal_layering_mode
+ { 0 }, // ss_target_bitrate
+ 1, // ts_number_layers
+ { 0 }, // ts_target_bitrate
+ { 0 }, // ts_rate_decimator
+ 0, // ts_periodicity
+ { 0 }, // ts_layer_id
+ { 0 }, // layer_taget_bitrate
+ 0, // temporal_layering_mode
+ 0, // use_vizier_rc_params
+ { 1, 1 }, // active_wq_factor
+ { 1, 1 }, // err_per_mb_factor
+ { 1, 1 }, // sr_default_decay_limit
+ { 1, 1 }, // sr_diff_factor
+ { 1, 1 }, // kf_err_per_mb_factor
+ { 1, 1 }, // kf_frame_min_boost_factor
+ { 1, 1 }, // kf_frame_max_boost_first_factor
+ { 1, 1 }, // kf_frame_max_boost_subs_factor
+ { 1, 1 }, // kf_max_total_boost_factor
+ { 1, 1 }, // gf_max_total_boost_factor
+ { 1, 1 }, // gf_frame_max_boost_factor
+ { 1, 1 }, // zm_factor
+ { 1, 1 }, // rd_mult_inter_qp_fac
+ { 1, 1 }, // rd_mult_arf_qp_fac
+ { 1, 1 }, // rd_mult_key_qp_fac
} },
};
@@ -2105,11 +2301,6 @@
DUMP_STRUCT_VALUE(fp, oxcf, target_level);
// TODO(angiebird): dump two_pass_stats_in
-
-#if CONFIG_FP_MB_STATS
- // TODO(angiebird): dump firstpass_mb_stats_in
-#endif
-
DUMP_STRUCT_VALUE(fp, oxcf, tuning);
DUMP_STRUCT_VALUE(fp, oxcf, content);
#if CONFIG_VP9_HIGHBITDEPTH
@@ -2123,6 +2314,8 @@
DUMP_STRUCT_VALUE(fp, oxcf, row_mt);
DUMP_STRUCT_VALUE(fp, oxcf, motion_vector_unit_test);
+ DUMP_STRUCT_VALUE(fp, oxcf, delta_q_uv);
+ DUMP_STRUCT_VALUE(fp, oxcf, use_simple_encode_api);
}
FRAME_INFO vp9_get_frame_info(const VP9EncoderConfig *oxcf) {
--- a/vpx/internal/vpx_codec_internal.h
+++ b/vpx/internal/vpx_codec_internal.h
@@ -283,7 +283,7 @@
vpx_codec_enc_cfg_t cfg;
} vpx_codec_enc_cfg_map_t;
-/*!\brief Decoder algorithm interface interface
+/*!\brief Decoder algorithm interface
*
* All decoders \ref MUST expose a variable of this type.
*/
--- /dev/null
+++ b/vpx/internal/vpx_ratectrl_rtc.h
@@ -1,0 +1,62 @@
+/*
+ * Copyright (c) 2021 The WebM project authors. All Rights Reserved.
+ *
+ * Use of this source code is governed by a BSD-style license
+ * that can be found in the LICENSE file in the root of the source
+ * tree. An additional intellectual property rights grant can be found
+ * in the file PATENTS. All contributing project authors may
+ * be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef VPX_VPX_RATECTRL_RTC_H_
+#define VPX_VPX_RATECTRL_RTC_H_
+
+#include "vpx/vpx_encoder.h"
+
+namespace libvpx {
+struct VpxRateControlRtcConfig {
+ public:
+ VpxRateControlRtcConfig() {
+ width = 1280;
+ height = 720;
+ max_quantizer = 63;
+ min_quantizer = 2;
+ target_bandwidth = 1000;
+ buf_initial_sz = 600;
+ buf_optimal_sz = 600;
+ buf_sz = 1000;
+ undershoot_pct = overshoot_pct = 50;
+ max_intra_bitrate_pct = 50;
+ max_inter_bitrate_pct = 0;
+ framerate = 30.0;
+ ts_number_layers = 1;
+ rc_mode = VPX_CBR;
+ aq_mode = 0;
+ layer_target_bitrate[0] = static_cast<int>(target_bandwidth);
+ ts_rate_decimator[0] = 1;
+ }
+
+ int width;
+ int height;
+ // 0-63
+ int max_quantizer;
+ int min_quantizer;
+ int64_t target_bandwidth;
+ int64_t buf_initial_sz;
+ int64_t buf_optimal_sz;
+ int64_t buf_sz;
+ int undershoot_pct;
+ int overshoot_pct;
+ int max_intra_bitrate_pct;
+ int max_inter_bitrate_pct;
+ double framerate;
+ // Number of temporal layers
+ int ts_number_layers;
+ int layer_target_bitrate[VPX_MAX_LAYERS];
+ int ts_rate_decimator[VPX_TS_MAX_LAYERS];
+ // vbr, cbr
+ enum vpx_rc_mode rc_mode;
+ int aq_mode;
+};
+} // namespace libvpx
+#endif
--- a/vpx/src/vpx_image.c
+++ b/vpx/src/vpx_image.c
@@ -8,6 +8,7 @@
* be found in the AUTHORS file in the root of the source tree.
*/
+#include <limits.h>
#include <stdlib.h>
#include <string.h>
@@ -22,8 +23,10 @@
unsigned char *img_data) {
unsigned int h, w, s, xcs, ycs, bps;
unsigned int stride_in_bytes;
- int align;
+ unsigned int align;
+ if (img != NULL) memset(img, 0, sizeof(vpx_image_t));
+
/* Treat align==0 like align==1 */
if (!buf_align) buf_align = 1;
@@ -88,8 +91,6 @@
if (!img) goto fail;
img->self_allocd = 1;
- } else {
- memset(img, 0, sizeof(vpx_image_t));
}
img->img_data = img_data;
@@ -152,9 +153,8 @@
int vpx_img_set_rect(vpx_image_t *img, unsigned int x, unsigned int y,
unsigned int w, unsigned int h) {
- unsigned char *data;
-
- if (x + w <= img->w && y + h <= img->h) {
+ if (x <= UINT_MAX - w && x + w <= img->w && y <= UINT_MAX - h &&
+ y + h <= img->h) {
img->d_w = w;
img->d_h = h;
@@ -165,7 +165,7 @@
} else {
const int bytes_per_sample =
(img->fmt & VPX_IMG_FMT_HIGHBITDEPTH) ? 2 : 1;
- data = img->img_data;
+ unsigned char *data = img->img_data;
if (img->fmt & VPX_IMG_FMT_HAS_ALPHA) {
img->planes[VPX_PLANE_ALPHA] =
--- a/vpx/vp8cx.h
+++ b/vpx/vp8cx.h
@@ -712,6 +712,47 @@
* Supported in codecs: VP9
*/
VP9E_SET_EXTERNAL_RATE_CONTROL,
+
+ /*!\brief Codec control to disable internal features in rate control.
+ *
+ * This will do 3 things, only for 1 pass:
+ * - Turn off low motion computation
+ * - Turn off gf update constraint on key frame frequency
+ * - Turn off content mode for cyclic refresh
+ *
+ * With those, the rate control is expected to work exactly the same as the
+ * interface provided in ratectrl_rtc.cc/h
+ *
+ * Supported in codecs: VP9
+ */
+ VP9E_SET_RTC_EXTERNAL_RATECTRL,
+
+ /*!\brief Codec control function to get loopfilter level in the encoder.
+ *
+ * Supported in codecs: VP9
+ */
+ VP9E_GET_LOOPFILTER_LEVEL,
+
+ /*!\brief Codec control to get last quantizers for all spatial layers.
+ *
+ * Return value uses an array of internal quantizers scale defined by the
+ * codec, for all spatial layers.
+ * The size of the array passed in should be #VPX_SS_MAX_LAYERS.
+ *
+ * Supported in codecs: VP9
+ */
+ VP9E_GET_LAST_QUANTIZER_SVC_LAYERS,
+
+ /*!\brief Codec control to disable internal features in rate control.
+ *
+ * This will turn off cyclic refresh for vp8.
+ *
+ * With this, the rate control is expected to work exactly the same as the
+ * interface provided in vp8_ratectrl_rtc.cc/h
+ *
+ * Supported in codecs: VP8
+ */
+ VP8E_SET_RTC_EXTERNAL_RATECTRL,
};
/*!\brief vpx 1-D scaling mode
@@ -767,8 +808,8 @@
unsigned int rows; /**< Number of rows. */
unsigned int cols; /**< Number of columns. */
/*! VP8 only uses the first 4 segments. VP9 uses 8 segments. */
- int delta_q[8]; /**< Quantizer deltas. */
- int delta_lf[8]; /**< Loop filter deltas. */
+ int delta_q[8]; /**< Quantizer deltas. Valid range: [-63, 63].*/
+ int delta_lf[8]; /**< Loop filter deltas. Valid range: [-63, 63].*/
/*! skip and ref frame segment is only used in VP9. */
int skip[8]; /**< Skip this block. */
int ref_frame[8]; /**< Reference frame for this block. */
@@ -969,6 +1010,9 @@
#define VPX_CTRL_VP8E_GET_LAST_QUANTIZER
VPX_CTRL_USE_TYPE(VP8E_GET_LAST_QUANTIZER_64, int *)
#define VPX_CTRL_VP8E_GET_LAST_QUANTIZER_64
+VPX_CTRL_USE_TYPE(VP9E_GET_LAST_QUANTIZER_SVC_LAYERS, int *)
+#define VPX_CTRL_VP9E_GET_LAST_QUANTIZER_SVC_LAYERS
+
VPX_CTRL_USE_TYPE(VP9E_GET_SVC_LAYER_ID, vpx_svc_layer_id_t *)
#define VPX_CTRL_VP9E_GET_SVC_LAYER_ID
@@ -1037,6 +1081,9 @@
VPX_CTRL_USE_TYPE(VP9E_GET_LEVEL, int *)
#define VPX_CTRL_VP9E_GET_LEVEL
+VPX_CTRL_USE_TYPE(VP9E_GET_LOOPFILTER_LEVEL, int *)
+#define VPX_CTRL_VP9E_GET_LOOPFILTER_LEVEL
+
VPX_CTRL_USE_TYPE(VP9E_ENABLE_MOTION_VECTOR_UNIT_TEST, unsigned int)
#define VPX_CTRL_VP9E_ENABLE_MOTION_VECTOR_UNIT_TEST
@@ -1067,6 +1114,12 @@
VPX_CTRL_USE_TYPE(VP9E_SET_DISABLE_LOOPFILTER, int)
#define VPX_CTRL_VP9E_SET_DISABLE_LOOPFILTER
+
+VPX_CTRL_USE_TYPE(VP9E_SET_RTC_EXTERNAL_RATECTRL, int)
+#define VPX_CTRL_VP9E_SET_RTC_EXTERNAL_RATECTRL
+
+VPX_CTRL_USE_TYPE(VP8E_SET_RTC_EXTERNAL_RATECTRL, int)
+#define VPX_CTRL_VP8E_SET_RTC_EXTERNAL_RATECTRL
VPX_CTRL_USE_TYPE(VP9E_SET_EXTERNAL_RATE_CONTROL, vpx_rc_funcs_t *)
#define VPX_CTRL_VP9E_SET_EXTERNAL_RATE_CONTROL
--- a/vpx/vpx_codec.mk
+++ b/vpx/vpx_codec.mk
@@ -33,6 +33,7 @@
API_SRCS-yes += src/vpx_encoder.c
API_SRCS-yes += vpx_encoder.h
API_SRCS-yes += internal/vpx_codec_internal.h
+API_SRCS-yes += internal/vpx_ratectrl_rtc.h
API_SRCS-yes += src/vpx_codec.c
API_SRCS-yes += src/vpx_image.c
API_SRCS-yes += vpx_codec.h
--- a/vpx/vpx_encoder.h
+++ b/vpx/vpx_encoder.h
@@ -58,7 +58,7 @@
* fields to structures
*/
#define VPX_ENCODER_ABI_VERSION \
- (14 + VPX_CODEC_ABI_VERSION + \
+ (15 + VPX_CODEC_ABI_VERSION + \
VPX_EXT_RATECTRL_ABI_VERSION) /**<\hideinitializer*/
/*! \brief Encoder capabilities bitfield
@@ -457,7 +457,7 @@
/*!\brief Target data rate
*
- * Target bandwidth to use for this stream, in kilobits per second.
+ * Target bitrate to use for this stream, in kilobits per second.
*/
unsigned int rc_target_bitrate;
@@ -498,7 +498,7 @@
* undershoot level (current rate vs target) beyond which more aggressive
* corrective measures are taken.
* *
- * Valid values in the range VP8:0-1000 VP9: 0-100.
+ * Valid values in the range VP8:0-100 VP9: 0-100.
*/
unsigned int rc_undershoot_pct;
@@ -513,7 +513,7 @@
* overshoot level (current rate vs target) beyond which more aggressive
* corrective measures are taken.
*
- * Valid values in the range VP8:0-1000 VP9: 0-100.
+ * Valid values in the range VP8:0-100 VP9: 0-100.
*/
unsigned int rc_overshoot_pct;
@@ -693,6 +693,151 @@
*
*/
int temporal_layering_mode;
+
+ /*!\brief A flag indicating whether to use external rate control parameters.
+ * By default is 0. If set to 1, the following parameters will be used in the
+ * rate control system.
+ */
+ int use_vizier_rc_params;
+
+ /*!\brief Active worst quality factor.
+ *
+ * Rate control parameters, set from external experiment results.
+ * Only when |use_vizier_rc_params| is set to 1, the pass in value will be
+ * used. Otherwise, the default value is used.
+ *
+ */
+ vpx_rational_t active_wq_factor;
+
+ /*!\brief Error per macroblock adjustment factor.
+ *
+ * Rate control parameters, set from external experiment results.
+ * Only when |use_vizier_rc_params| is set to 1, the pass in value will be
+ * used. Otherwise, the default value is used.
+ *
+ */
+ vpx_rational_t err_per_mb_factor;
+
+ /*!\brief Second reference default decay limit.
+ *
+ * Rate control parameters, set from external experiment results.
+ * Only when |use_vizier_rc_params| is set to 1, the pass in value will be
+ * used. Otherwise, the default value is used.
+ *
+ */
+ vpx_rational_t sr_default_decay_limit;
+
+ /*!\brief Second reference difference factor.
+ *
+ * Rate control parameters, set from external experiment results.
+ * Only when |use_vizier_rc_params| is set to 1, the pass in value will be
+ * used. Otherwise, the default value is used.
+ *
+ */
+ vpx_rational_t sr_diff_factor;
+
+ /*!\brief Keyframe error per macroblock adjustment factor.
+ *
+ * Rate control parameters, set from external experiment results.
+ * Only when |use_vizier_rc_params| is set to 1, the pass in value will be
+ * used. Otherwise, the default value is used.
+ *
+ */
+ vpx_rational_t kf_err_per_mb_factor;
+
+ /*!\brief Keyframe minimum boost adjustment factor.
+ *
+ * Rate control parameters, set from external experiment results.
+ * Only when |use_vizier_rc_params| is set to 1, the pass in value will be
+ * used. Otherwise, the default value is used.
+ *
+ */
+ vpx_rational_t kf_frame_min_boost_factor;
+
+ /*!\brief Keyframe maximum boost adjustment factor, for the first keyframe
+ * in a chunk.
+ *
+ * Rate control parameters, set from external experiment results.
+ * Only when |use_vizier_rc_params| is set to 1, the pass in value will be
+ * used. Otherwise, the default value is used.
+ *
+ */
+ vpx_rational_t kf_frame_max_boost_first_factor;
+
+ /*!\brief Keyframe maximum boost adjustment factor, for subsequent keyframes.
+ *
+ * Rate control parameters, set from external experiment results.
+ * Only when |use_vizier_rc_params| is set to 1, the pass in value will be
+ * used. Otherwise, the default value is used.
+ *
+ */
+ vpx_rational_t kf_frame_max_boost_subs_factor;
+
+ /*!\brief Keyframe maximum total boost adjustment factor.
+ *
+ * Rate control parameters, set from external experiment results.
+ * Only when |use_vizier_rc_params| is set to 1, the pass in value will be
+ * used. Otherwise, the default value is used.
+ *
+ */
+ vpx_rational_t kf_max_total_boost_factor;
+
+ /*!\brief Golden frame maximum total boost adjustment factor.
+ *
+ * Rate control parameters, set from external experiment results.
+ * Only when |use_vizier_rc_params| is set to 1, the pass in value will be
+ * used. Otherwise, the default value is used.
+ *
+ */
+ vpx_rational_t gf_max_total_boost_factor;
+
+ /*!\brief Golden frame maximum boost adjustment factor.
+ *
+ * Rate control parameters, set from external experiment results.
+ * Only when |use_vizier_rc_params| is set to 1, the pass in value will be
+ * used. Otherwise, the default value is used.
+ *
+ */
+ vpx_rational_t gf_frame_max_boost_factor;
+
+ /*!\brief Zero motion power factor.
+ *
+ * Rate control parameters, set from external experiment results.
+ * Only when |use_vizier_rc_params| is set to 1, the pass in value will be
+ * used. Otherwise, the default value is used.
+ *
+ */
+ vpx_rational_t zm_factor;
+
+ /*!\brief Rate-distortion multiplier for inter frames.
+ * The multiplier is a crucial parameter in the calculation of rate distortion
+ * cost. It is often related to the qp (qindex) value.
+ * Rate control parameters, could be set from external experiment results.
+ * Only when |use_vizier_rc_params| is set to 1, the pass in value will be
+ * used. Otherwise, the default value is used.
+ *
+ */
+ vpx_rational_t rd_mult_inter_qp_fac;
+
+ /*!\brief Rate-distortion multiplier for alt-ref frames.
+ * The multiplier is a crucial parameter in the calculation of rate distortion
+ * cost. It is often related to the qp (qindex) value.
+ * Rate control parameters, could be set from external experiment results.
+ * Only when |use_vizier_rc_params| is set to 1, the pass in value will be
+ * used. Otherwise, the default value is used.
+ *
+ */
+ vpx_rational_t rd_mult_arf_qp_fac;
+
+ /*!\brief Rate-distortion multiplier for key frames.
+ * The multiplier is a crucial parameter in the calculation of rate distortion
+ * cost. It is often related to the qp (qindex) value.
+ * Rate control parameters, could be set from external experiment results.
+ * Only when |use_vizier_rc_params| is set to 1, the pass in value will be
+ * used. Otherwise, the default value is used.
+ *
+ */
+ vpx_rational_t rd_mult_key_qp_fac;
} vpx_codec_enc_cfg_t; /**< alias for struct vpx_codec_enc_cfg */
/*!\brief vp9 svc extra configure parameters
--- a/vpx/vpx_ext_ratectrl.h
+++ b/vpx/vpx_ext_ratectrl.h
@@ -38,9 +38,15 @@
*
* The encoder will receive the decision from the external rate control model
* through get_encodeframe_decision() defined in vpx_rc_funcs_t.
+ *
+ * If max_frame_size = 0, the encoding ignores max frame size limit.
+ * If max_frame_size = -1, the encoding uses VP9's max frame size as the limit.
+ * If the encoded frame size is larger than max_frame_size, the frame is
+ * recoded to meet the size limit, following VP9's recoding principles.
*/
typedef struct vpx_rc_encodeframe_decision {
- int q_index; /**< Quantizer step index [0..255]*/
+ int q_index; /**< Quantizer step index [0..255]*/
+ int max_frame_size; /**< Maximal frame size allowed to encode a frame*/
} vpx_rc_encodeframe_decision_t;
/*!\brief Information for the frame to be encoded.
@@ -82,6 +88,7 @@
int64_t sse; /**< sum of squared error of the reconstructed frame */
int64_t bit_count; /**< number of bits spent on coding the frame*/
int64_t pixel_count; /**< number of pixels in YUV planes of the frame*/
+ int actual_encoding_qindex; /**< the actual qindex used to encode the frame*/
} vpx_rc_encodeframe_result_t;
/*!\brief Status returned by rate control callback functions.
--- a/vpx/vpx_image.h
+++ b/vpx/vpx_image.h
@@ -171,7 +171,8 @@
/*!\brief Set the rectangle identifying the displayed portion of the image
*
* Updates the displayed rectangle (aka viewport) on the image surface to
- * match the specified coordinates and size.
+ * match the specified coordinates and size. Specifically, sets img->d_w,
+ * img->d_h, and elements of the img->planes[] array.
*
* \param[in] img Image descriptor
* \param[in] x leftmost column
@@ -179,7 +180,7 @@
* \param[in] w width
* \param[in] h height
*
- * \return 0 if the requested rectangle is valid, nonzero otherwise.
+ * \return 0 if the requested rectangle is valid, nonzero (-1) otherwise.
*/
int vpx_img_set_rect(vpx_image_t *img, unsigned int x, unsigned int y,
unsigned int w, unsigned int h);
--- a/vpx_dsp/arm/avg_neon.c
+++ b/vpx_dsp/arm/avg_neon.c
@@ -22,8 +22,7 @@
uint32_t vpx_avg_4x4_neon(const uint8_t *a, int a_stride) {
const uint8x16_t b = load_unaligned_u8q(a, a_stride);
const uint16x8_t c = vaddl_u8(vget_low_u8(b), vget_high_u8(b));
- const uint32x2_t d = horizontal_add_uint16x8(c);
- return vget_lane_u32(vrshr_n_u32(d, 4), 0);
+ return (horizontal_add_uint16x8(c) + (1 << 3)) >> 4;
}
uint32_t vpx_avg_8x8_neon(const uint8_t *a, int a_stride) {
@@ -30,7 +29,6 @@
int i;
uint8x8_t b, c;
uint16x8_t sum;
- uint32x2_t d;
b = vld1_u8(a);
a += a_stride;
c = vld1_u8(a);
@@ -43,9 +41,7 @@
sum = vaddw_u8(sum, d);
}
- d = horizontal_add_uint16x8(sum);
-
- return vget_lane_u32(vrshr_n_u32(d, 6), 0);
+ return (horizontal_add_uint16x8(sum) + (1 << 5)) >> 6;
}
// coeff: 16 bits, dynamic range [-32640, 32640].
@@ -139,8 +135,7 @@
ref += 16;
}
- return vget_lane_s16(vreinterpret_s16_u32(horizontal_add_uint16x8(vec_sum)),
- 0);
+ return (int16_t)horizontal_add_uint16x8(vec_sum);
}
// ref, src = [0, 510] - max diff = 16-bits
--- a/vpx_dsp/arm/fdct_partial_neon.c
+++ b/vpx_dsp/arm/fdct_partial_neon.c
@@ -15,19 +15,10 @@
#include "vpx_dsp/arm/mem_neon.h"
#include "vpx_dsp/arm/sum_neon.h"
-static INLINE tran_low_t get_lane(const int32x2_t a) {
-#if CONFIG_VP9_HIGHBITDEPTH
- return vget_lane_s32(a, 0);
-#else
- return vget_lane_s16(vreinterpret_s16_s32(a), 0);
-#endif // CONFIG_VP9_HIGHBITDETPH
-}
-
void vpx_fdct4x4_1_neon(const int16_t *input, tran_low_t *output, int stride) {
int16x4_t a0, a1, a2, a3;
int16x8_t b0, b1;
int16x8_t c;
- int32x2_t d;
a0 = vld1_s16(input);
input += stride;
@@ -42,9 +33,7 @@
c = vaddq_s16(b0, b1);
- d = horizontal_add_int16x8(c);
-
- output[0] = get_lane(vshl_n_s32(d, 1));
+ output[0] = (tran_low_t)(horizontal_add_int16x8(c) << 1);
output[1] = 0;
}
@@ -57,7 +46,7 @@
sum = vaddq_s16(sum, input_00);
}
- output[0] = get_lane(horizontal_add_int16x8(sum));
+ output[0] = (tran_low_t)horizontal_add_int16x8(sum);
output[1] = 0;
}
@@ -66,7 +55,7 @@
int r;
int16x8_t left = vld1q_s16(input);
int16x8_t right = vld1q_s16(input + 8);
- int32x2_t sum;
+ int32_t sum;
input += stride;
for (r = 1; r < 16; ++r) {
@@ -77,9 +66,9 @@
right = vaddq_s16(right, b);
}
- sum = vadd_s32(horizontal_add_int16x8(left), horizontal_add_int16x8(right));
+ sum = horizontal_add_int16x8(left) + horizontal_add_int16x8(right);
- output[0] = get_lane(vshr_n_s32(sum, 1));
+ output[0] = (tran_low_t)(sum >> 1);
output[1] = 0;
}
@@ -90,7 +79,7 @@
int16x8_t a1 = vld1q_s16(input + 8);
int16x8_t a2 = vld1q_s16(input + 16);
int16x8_t a3 = vld1q_s16(input + 24);
- int32x2_t sum;
+ int32_t sum;
input += stride;
for (r = 1; r < 32; ++r) {
@@ -105,9 +94,10 @@
a3 = vaddq_s16(a3, b3);
}
- sum = vadd_s32(horizontal_add_int16x8(a0), horizontal_add_int16x8(a1));
- sum = vadd_s32(sum, horizontal_add_int16x8(a2));
- sum = vadd_s32(sum, horizontal_add_int16x8(a3));
- output[0] = get_lane(vshr_n_s32(sum, 3));
+ sum = horizontal_add_int16x8(a0);
+ sum += horizontal_add_int16x8(a1);
+ sum += horizontal_add_int16x8(a2);
+ sum += horizontal_add_int16x8(a3);
+ output[0] = (tran_low_t)(sum >> 3);
output[1] = 0;
}
--- a/vpx_dsp/arm/highbd_loopfilter_neon.c
+++ b/vpx_dsp/arm/highbd_loopfilter_neon.c
@@ -661,6 +661,17 @@
vpx_highbd_lpf_vertical_8_neon(s + 8 * p, p, blimit1, limit1, thresh1, bd);
}
+// Quiet warnings of the form: 'vpx_dsp/arm/highbd_loopfilter_neon.c|675 col 67|
+// warning: 'oq1' may be used uninitialized in this function
+// [-Wmaybe-uninitialized]', for oq1-op1. Without reworking the code or adding
+// an additional branch this warning cannot be silenced otherwise. The
+// loopfilter is only called when needed for a block so these output pixels
+// will be set.
+#if defined(__GNUC__) && __GNUC__ >= 4 && !defined(__clang__)
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wmaybe-uninitialized"
+#endif
+
static void lpf_horizontal_16_kernel(uint16_t *s, int p,
const uint16x8_t blimit_vec,
const uint16x8_t limit_vec,
@@ -722,6 +733,10 @@
store_4x8(s - 2, p, op1, op0, oq0, oq1);
}
}
+
+#if defined(__GNUC__) && __GNUC__ >= 4 && !defined(__clang__)
+#pragma GCC diagnostic pop
+#endif
void vpx_highbd_lpf_horizontal_16_neon(uint16_t *s, int p,
const uint8_t *blimit,
--- a/vpx_dsp/arm/loopfilter_neon.c
+++ b/vpx_dsp/arm/loopfilter_neon.c
@@ -975,6 +975,17 @@
FUN_LPF_16_KERNEL(_dual_, 16) // lpf_16_dual_kernel
#undef FUN_LPF_16_KERNEL
+// Quiet warnings of the form: 'vpx_dsp/arm/loopfilter_neon.c|981 col 42|
+// warning: 'oq1' may be used uninitialized in this function
+// [-Wmaybe-uninitialized]', for oq1-op1. Without reworking the code or adding
+// an additional branch this warning cannot be silenced otherwise. The
+// loopfilter is only called when needed for a block so these output pixels
+// will be set.
+#if defined(__GNUC__) && __GNUC__ >= 4 && !defined(__clang__)
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wmaybe-uninitialized"
+#endif
+
void vpx_lpf_horizontal_16_neon(uint8_t *s, int p, const uint8_t *blimit,
const uint8_t *limit, const uint8_t *thresh) {
uint8x8_t p7, p6, p5, p4, p3, p2, p1, p0, q0, q1, q2, q3, q4, q5, q6, q7, op6,
@@ -1090,3 +1101,7 @@
vget_high_u8(oq0), vget_high_u8(oq1));
}
}
+
+#if defined(__GNUC__) && __GNUC__ >= 4 && !defined(__clang__)
+#pragma GCC diagnostic pop
+#endif
--- a/vpx_dsp/arm/mem_neon.h
+++ b/vpx_dsp/arm/mem_neon.h
@@ -19,6 +19,24 @@
#include "vpx/vpx_integer.h"
#include "vpx_dsp/vpx_dsp_common.h"
+// Support for these xN intrinsics is lacking in older versions of GCC.
+#if defined(__GNUC__) && !defined(__clang__)
+#if __GNUC__ < 8 || defined(__arm__)
+static INLINE uint8x16x2_t vld1q_u8_x2(uint8_t const *ptr) {
+ uint8x16x2_t res = { { vld1q_u8(ptr + 0 * 16), vld1q_u8(ptr + 1 * 16) } };
+ return res;
+}
+#endif
+
+#if __GNUC__ < 9 || defined(__arm__)
+static INLINE uint8x16x3_t vld1q_u8_x3(uint8_t const *ptr) {
+ uint8x16x3_t res = { { vld1q_u8(ptr + 0 * 16), vld1q_u8(ptr + 1 * 16),
+ vld1q_u8(ptr + 2 * 16) } };
+ return res;
+}
+#endif
+#endif
+
static INLINE int16x4_t create_s16x4_neon(const int16_t c0, const int16_t c1,
const int16_t c2, const int16_t c3) {
return vcreate_s16((uint16_t)c0 | ((uint32_t)c1 << 16) |
@@ -95,7 +113,8 @@
}
// Load 2 sets of 4 bytes when alignment is not guaranteed.
-static INLINE uint8x8_t load_unaligned_u8(const uint8_t *buf, int stride) {
+static INLINE uint8x8_t load_unaligned_u8(const uint8_t *buf,
+ ptrdiff_t stride) {
uint32_t a;
uint32x2_t a_u32 = vdup_n_u32(0);
if (stride == 4) return vld1_u8(buf);
@@ -108,7 +127,7 @@
}
// Store 2 sets of 4 bytes when alignment is not guaranteed.
-static INLINE void store_unaligned_u8(uint8_t *buf, int stride,
+static INLINE void store_unaligned_u8(uint8_t *buf, ptrdiff_t stride,
const uint8x8_t a) {
const uint32x2_t a_u32 = vreinterpret_u32_u8(a);
if (stride == 4) {
@@ -121,7 +140,8 @@
}
// Load 4 sets of 4 bytes when alignment is not guaranteed.
-static INLINE uint8x16_t load_unaligned_u8q(const uint8_t *buf, int stride) {
+static INLINE uint8x16_t load_unaligned_u8q(const uint8_t *buf,
+ ptrdiff_t stride) {
uint32_t a;
uint32x4_t a_u32 = vdupq_n_u32(0);
if (stride == 4) return vld1q_u8(buf);
@@ -141,7 +161,7 @@
}
// Store 4 sets of 4 bytes when alignment is not guaranteed.
-static INLINE void store_unaligned_u8q(uint8_t *buf, int stride,
+static INLINE void store_unaligned_u8q(uint8_t *buf, ptrdiff_t stride,
const uint8x16_t a) {
const uint32x4_t a_u32 = vreinterpretq_u32_u8(a);
if (stride == 4) {
@@ -158,7 +178,7 @@
}
// Load 2 sets of 4 bytes when alignment is guaranteed.
-static INLINE uint8x8_t load_u8(const uint8_t *buf, int stride) {
+static INLINE uint8x8_t load_u8(const uint8_t *buf, ptrdiff_t stride) {
uint32x2_t a = vdup_n_u32(0);
assert(!((intptr_t)buf % sizeof(uint32_t)));
@@ -171,7 +191,7 @@
}
// Store 2 sets of 4 bytes when alignment is guaranteed.
-static INLINE void store_u8(uint8_t *buf, int stride, const uint8x8_t a) {
+static INLINE void store_u8(uint8_t *buf, ptrdiff_t stride, const uint8x8_t a) {
uint32x2_t a_u32 = vreinterpret_u32_u8(a);
assert(!((intptr_t)buf % sizeof(uint32_t)));
--- a/vpx_dsp/arm/sad4d_neon.c
+++ b/vpx_dsp/arm/sad4d_neon.c
@@ -34,7 +34,9 @@
uint32_t *const res) {
int i;
uint16x8_t abs[2] = { vdupq_n_u16(0), vdupq_n_u16(0) };
+#if !defined(__aarch64__)
uint16x4_t a[2];
+#endif
uint32x4_t r;
assert(!((intptr_t)src_ptr % sizeof(uint32_t)));
@@ -51,9 +53,14 @@
abs[1] = vabal_u8(abs[1], s, ref23);
}
+#if defined(__aarch64__)
+ abs[0] = vpaddq_u16(abs[0], abs[1]);
+ r = vpaddlq_u16(abs[0]);
+#else
a[0] = vpadd_u16(vget_low_u16(abs[0]), vget_high_u16(abs[0]));
a[1] = vpadd_u16(vget_low_u16(abs[1]), vget_high_u16(abs[1]));
r = vpaddlq_u16(vcombine_u16(a[0], a[1]));
+#endif
vst1q_u32(res, r);
}
@@ -74,6 +81,12 @@
// Can handle 512 pixels' sad sum (such as 16x32 or 32x16)
static INLINE void sad_512_pel_final_neon(const uint16x8_t *sum /*[4]*/,
uint32_t *const res) {
+#if defined(__aarch64__)
+ const uint16x8_t a0 = vpaddq_u16(sum[0], sum[1]);
+ const uint16x8_t a1 = vpaddq_u16(sum[2], sum[3]);
+ const uint16x8_t b0 = vpaddq_u16(a0, a1);
+ const uint32x4_t r = vpaddlq_u16(b0);
+#else
const uint16x4_t a0 = vadd_u16(vget_low_u16(sum[0]), vget_high_u16(sum[0]));
const uint16x4_t a1 = vadd_u16(vget_low_u16(sum[1]), vget_high_u16(sum[1]));
const uint16x4_t a2 = vadd_u16(vget_low_u16(sum[2]), vget_high_u16(sum[2]));
@@ -81,12 +94,23 @@
const uint16x4_t b0 = vpadd_u16(a0, a1);
const uint16x4_t b1 = vpadd_u16(a2, a3);
const uint32x4_t r = vpaddlq_u16(vcombine_u16(b0, b1));
+#endif
vst1q_u32(res, r);
}
+#if defined(__arm__) || !defined(__ARM_FEATURE_DOTPROD)
+
// Can handle 1024 pixels' sad sum (such as 32x32)
static INLINE void sad_1024_pel_final_neon(const uint16x8_t *sum /*[4]*/,
uint32_t *const res) {
+#if defined(__aarch64__)
+ const uint16x8_t a0 = vpaddq_u16(sum[0], sum[1]);
+ const uint16x8_t a1 = vpaddq_u16(sum[2], sum[3]);
+ const uint32x4_t b0 = vpaddlq_u16(a0);
+ const uint32x4_t b1 = vpaddlq_u16(a1);
+ const uint32x4_t r = vpaddq_u32(b0, b1);
+ vst1q_u32(res, r);
+#else
const uint16x4_t a0 = vpadd_u16(vget_low_u16(sum[0]), vget_high_u16(sum[0]));
const uint16x4_t a1 = vpadd_u16(vget_low_u16(sum[1]), vget_high_u16(sum[1]));
const uint16x4_t a2 = vpadd_u16(vget_low_u16(sum[2]), vget_high_u16(sum[2]));
@@ -96,15 +120,26 @@
const uint32x2_t c0 = vpadd_u32(vget_low_u32(b0), vget_high_u32(b0));
const uint32x2_t c1 = vpadd_u32(vget_low_u32(b1), vget_high_u32(b1));
vst1q_u32(res, vcombine_u32(c0, c1));
+#endif
}
// Can handle 2048 pixels' sad sum (such as 32x64 or 64x32)
static INLINE void sad_2048_pel_final_neon(const uint16x8_t *sum /*[4]*/,
uint32_t *const res) {
+#if defined(__aarch64__)
const uint32x4_t a0 = vpaddlq_u16(sum[0]);
const uint32x4_t a1 = vpaddlq_u16(sum[1]);
const uint32x4_t a2 = vpaddlq_u16(sum[2]);
const uint32x4_t a3 = vpaddlq_u16(sum[3]);
+ const uint32x4_t b0 = vpaddq_u32(a0, a1);
+ const uint32x4_t b1 = vpaddq_u32(a2, a3);
+ const uint32x4_t r = vpaddq_u32(b0, b1);
+ vst1q_u32(res, r);
+#else
+ const uint32x4_t a0 = vpaddlq_u16(sum[0]);
+ const uint32x4_t a1 = vpaddlq_u16(sum[1]);
+ const uint32x4_t a2 = vpaddlq_u16(sum[2]);
+ const uint32x4_t a3 = vpaddlq_u16(sum[3]);
const uint32x2_t b0 = vadd_u32(vget_low_u32(a0), vget_high_u32(a0));
const uint32x2_t b1 = vadd_u32(vget_low_u32(a1), vget_high_u32(a1));
const uint32x2_t b2 = vadd_u32(vget_low_u32(a2), vget_high_u32(a2));
@@ -112,11 +147,13 @@
const uint32x2_t c0 = vpadd_u32(b0, b1);
const uint32x2_t c1 = vpadd_u32(b2, b3);
vst1q_u32(res, vcombine_u32(c0, c1));
+#endif
}
// Can handle 4096 pixels' sad sum (such as 64x64)
static INLINE void sad_4096_pel_final_neon(const uint16x8_t *sum /*[8]*/,
uint32_t *const res) {
+#if defined(__aarch64__)
const uint32x4_t a0 = vpaddlq_u16(sum[0]);
const uint32x4_t a1 = vpaddlq_u16(sum[1]);
const uint32x4_t a2 = vpaddlq_u16(sum[2]);
@@ -129,6 +166,23 @@
const uint32x4_t b1 = vaddq_u32(a2, a3);
const uint32x4_t b2 = vaddq_u32(a4, a5);
const uint32x4_t b3 = vaddq_u32(a6, a7);
+ const uint32x4_t c0 = vpaddq_u32(b0, b1);
+ const uint32x4_t c1 = vpaddq_u32(b2, b3);
+ const uint32x4_t r = vpaddq_u32(c0, c1);
+ vst1q_u32(res, r);
+#else
+ const uint32x4_t a0 = vpaddlq_u16(sum[0]);
+ const uint32x4_t a1 = vpaddlq_u16(sum[1]);
+ const uint32x4_t a2 = vpaddlq_u16(sum[2]);
+ const uint32x4_t a3 = vpaddlq_u16(sum[3]);
+ const uint32x4_t a4 = vpaddlq_u16(sum[4]);
+ const uint32x4_t a5 = vpaddlq_u16(sum[5]);
+ const uint32x4_t a6 = vpaddlq_u16(sum[6]);
+ const uint32x4_t a7 = vpaddlq_u16(sum[7]);
+ const uint32x4_t b0 = vaddq_u32(a0, a1);
+ const uint32x4_t b1 = vaddq_u32(a2, a3);
+ const uint32x4_t b2 = vaddq_u32(a4, a5);
+ const uint32x4_t b3 = vaddq_u32(a6, a7);
const uint32x2_t c0 = vadd_u32(vget_low_u32(b0), vget_high_u32(b0));
const uint32x2_t c1 = vadd_u32(vget_low_u32(b1), vget_high_u32(b1));
const uint32x2_t c2 = vadd_u32(vget_low_u32(b2), vget_high_u32(b2));
@@ -136,8 +190,11 @@
const uint32x2_t d0 = vpadd_u32(c0, c1);
const uint32x2_t d1 = vpadd_u32(c2, c3);
vst1q_u32(res, vcombine_u32(d0, d1));
+#endif
}
+#endif
+
static INLINE void sad8x_4d(const uint8_t *src_ptr, int src_stride,
const uint8_t *const ref_array[4], int ref_stride,
uint32_t *res, const int height) {
@@ -180,7 +237,42 @@
////////////////////////////////////////////////////////////////////////////////
+#if defined(__aarch64__) && defined(__ARM_FEATURE_DOTPROD) && \
+ (__ARM_FEATURE_DOTPROD == 1)
+
static INLINE void sad16_neon(const uint8_t *ref_ptr, const uint8x16_t src_ptr,
+ uint32x4_t *const sum) {
+ const uint8x16_t r = vld1q_u8(ref_ptr);
+ const uint8x16_t diff = vabdq_u8(src_ptr, r);
+ *sum = vdotq_u32(*sum, diff, vdupq_n_u8(1));
+}
+
+static INLINE void sad16x_4d(const uint8_t *src_ptr, int src_stride,
+ const uint8_t *const ref_array[4], int ref_stride,
+ uint32_t *res, const int height) {
+ int i;
+ uint32x4_t r0, r1;
+ const uint8_t *ref_loop[4] = { ref_array[0], ref_array[1], ref_array[2],
+ ref_array[3] };
+ uint32x4_t sum[4] = { vdupq_n_u32(0), vdupq_n_u32(0), vdupq_n_u32(0),
+ vdupq_n_u32(0) };
+
+ for (i = 0; i < height; ++i) {
+ const uint8x16_t s = vld1q_u8(src_ptr + i * src_stride);
+ sad16_neon(ref_loop[0] + i * ref_stride, s, &sum[0]);
+ sad16_neon(ref_loop[1] + i * ref_stride, s, &sum[1]);
+ sad16_neon(ref_loop[2] + i * ref_stride, s, &sum[2]);
+ sad16_neon(ref_loop[3] + i * ref_stride, s, &sum[3]);
+ }
+
+ r0 = vpaddq_u32(sum[0], sum[1]);
+ r1 = vpaddq_u32(sum[2], sum[3]);
+ vst1q_u32(res, vpaddq_u32(r0, r1));
+}
+
+#else
+
+static INLINE void sad16_neon(const uint8_t *ref_ptr, const uint8x16_t src_ptr,
uint16x8_t *const sum) {
const uint8x16_t r = vld1q_u8(ref_ptr);
*sum = vabal_u8(*sum, vget_low_u8(src_ptr), vget_low_u8(r));
@@ -190,7 +282,7 @@
static INLINE void sad16x_4d(const uint8_t *src_ptr, int src_stride,
const uint8_t *const ref_array[4], int ref_stride,
uint32_t *res, const int height) {
- int i, j;
+ int i;
const uint8_t *ref_loop[4] = { ref_array[0], ref_array[1], ref_array[2],
ref_array[3] };
uint16x8_t sum[4] = { vdupq_n_u16(0), vdupq_n_u16(0), vdupq_n_u16(0),
@@ -199,15 +291,22 @@
for (i = 0; i < height; ++i) {
const uint8x16_t s = vld1q_u8(src_ptr);
src_ptr += src_stride;
- for (j = 0; j < 4; ++j) {
- sad16_neon(ref_loop[j], s, &sum[j]);
- ref_loop[j] += ref_stride;
- }
+ /* Manual unrolling here stops the compiler from getting confused. */
+ sad16_neon(ref_loop[0], s, &sum[0]);
+ ref_loop[0] += ref_stride;
+ sad16_neon(ref_loop[1], s, &sum[1]);
+ ref_loop[1] += ref_stride;
+ sad16_neon(ref_loop[2], s, &sum[2]);
+ ref_loop[2] += ref_stride;
+ sad16_neon(ref_loop[3], s, &sum[3]);
+ ref_loop[3] += ref_stride;
}
sad_512_pel_final_neon(sum, res);
}
+#endif
+
void vpx_sad16x8x4d_neon(const uint8_t *src_ptr, int src_stride,
const uint8_t *const ref_array[4], int ref_stride,
uint32_t *res) {
@@ -228,8 +327,69 @@
////////////////////////////////////////////////////////////////////////////////
+#if defined(__aarch64__) && defined(__ARM_FEATURE_DOTPROD) && \
+ (__ARM_FEATURE_DOTPROD == 1)
+
static INLINE void sad32x_4d(const uint8_t *src_ptr, int src_stride,
const uint8_t *const ref_array[4], int ref_stride,
+ uint32_t *res, const int height) {
+ int i;
+ uint32x4_t r0, r1;
+ const uint8_t *ref_loop[4] = { ref_array[0], ref_array[1], ref_array[2],
+ ref_array[3] };
+
+ uint32x4_t sum[4] = { vdupq_n_u32(0), vdupq_n_u32(0), vdupq_n_u32(0),
+ vdupq_n_u32(0) };
+
+ for (i = 0; i < height; ++i) {
+ uint8x16_t s;
+
+ s = vld1q_u8(src_ptr + 0 * 16);
+ sad16_neon(ref_loop[0] + 0 * 16, s, &sum[0]);
+ sad16_neon(ref_loop[1] + 0 * 16, s, &sum[1]);
+ sad16_neon(ref_loop[2] + 0 * 16, s, &sum[2]);
+ sad16_neon(ref_loop[3] + 0 * 16, s, &sum[3]);
+
+ s = vld1q_u8(src_ptr + 1 * 16);
+ sad16_neon(ref_loop[0] + 1 * 16, s, &sum[0]);
+ sad16_neon(ref_loop[1] + 1 * 16, s, &sum[1]);
+ sad16_neon(ref_loop[2] + 1 * 16, s, &sum[2]);
+ sad16_neon(ref_loop[3] + 1 * 16, s, &sum[3]);
+
+ src_ptr += src_stride;
+ ref_loop[0] += ref_stride;
+ ref_loop[1] += ref_stride;
+ ref_loop[2] += ref_stride;
+ ref_loop[3] += ref_stride;
+ }
+
+ r0 = vpaddq_u32(sum[0], sum[1]);
+ r1 = vpaddq_u32(sum[2], sum[3]);
+ vst1q_u32(res, vpaddq_u32(r0, r1));
+}
+
+void vpx_sad32x16x4d_neon(const uint8_t *src_ptr, int src_stride,
+ const uint8_t *const ref_array[4], int ref_stride,
+ uint32_t *res) {
+ sad32x_4d(src_ptr, src_stride, ref_array, ref_stride, res, 16);
+}
+
+void vpx_sad32x32x4d_neon(const uint8_t *src_ptr, int src_stride,
+ const uint8_t *const ref_array[4], int ref_stride,
+ uint32_t *res) {
+ sad32x_4d(src_ptr, src_stride, ref_array, ref_stride, res, 32);
+}
+
+void vpx_sad32x64x4d_neon(const uint8_t *src_ptr, int src_stride,
+ const uint8_t *const ref_array[4], int ref_stride,
+ uint32_t *res) {
+ sad32x_4d(src_ptr, src_stride, ref_array, ref_stride, res, 64);
+}
+
+#else
+
+static INLINE void sad32x_4d(const uint8_t *src_ptr, int src_stride,
+ const uint8_t *const ref_array[4], int ref_stride,
const int height, uint16x8_t *const sum) {
int i;
const uint8_t *ref_loop[4] = { ref_array[0], ref_array[1], ref_array[2],
@@ -284,14 +444,124 @@
sad_2048_pel_final_neon(sum, res);
}
-////////////////////////////////////////////////////////////////////////////////
-
+#endif
+
+////////////////////////////////////////////////////////////////////////////////
+
+#if defined(__aarch64__) && defined(__ARM_FEATURE_DOTPROD) && \
+ (__ARM_FEATURE_DOTPROD == 1)
+
void vpx_sad64x32x4d_neon(const uint8_t *src_ptr, int src_stride,
const uint8_t *const ref_array[4], int ref_stride,
uint32_t *res) {
int i;
+ uint32x4_t r0, r1;
const uint8_t *ref_loop[4] = { ref_array[0], ref_array[1], ref_array[2],
ref_array[3] };
+ uint32x4_t sum[4] = { vdupq_n_u32(0), vdupq_n_u32(0), vdupq_n_u32(0),
+ vdupq_n_u32(0) };
+
+ for (i = 0; i < 32; ++i) {
+ uint8x16_t s;
+
+ s = vld1q_u8(src_ptr + 0 * 16);
+ sad16_neon(ref_loop[0] + 0 * 16, s, &sum[0]);
+ sad16_neon(ref_loop[1] + 0 * 16, s, &sum[1]);
+ sad16_neon(ref_loop[2] + 0 * 16, s, &sum[2]);
+ sad16_neon(ref_loop[3] + 0 * 16, s, &sum[3]);
+
+ s = vld1q_u8(src_ptr + 1 * 16);
+ sad16_neon(ref_loop[0] + 1 * 16, s, &sum[0]);
+ sad16_neon(ref_loop[1] + 1 * 16, s, &sum[1]);
+ sad16_neon(ref_loop[2] + 1 * 16, s, &sum[2]);
+ sad16_neon(ref_loop[3] + 1 * 16, s, &sum[3]);
+
+ s = vld1q_u8(src_ptr + 2 * 16);
+ sad16_neon(ref_loop[0] + 2 * 16, s, &sum[0]);
+ sad16_neon(ref_loop[1] + 2 * 16, s, &sum[1]);
+ sad16_neon(ref_loop[2] + 2 * 16, s, &sum[2]);
+ sad16_neon(ref_loop[3] + 2 * 16, s, &sum[3]);
+
+ s = vld1q_u8(src_ptr + 3 * 16);
+ sad16_neon(ref_loop[0] + 3 * 16, s, &sum[0]);
+ sad16_neon(ref_loop[1] + 3 * 16, s, &sum[1]);
+ sad16_neon(ref_loop[2] + 3 * 16, s, &sum[2]);
+ sad16_neon(ref_loop[3] + 3 * 16, s, &sum[3]);
+
+ src_ptr += src_stride;
+ ref_loop[0] += ref_stride;
+ ref_loop[1] += ref_stride;
+ ref_loop[2] += ref_stride;
+ ref_loop[3] += ref_stride;
+ }
+
+ r0 = vpaddq_u32(sum[0], sum[1]);
+ r1 = vpaddq_u32(sum[2], sum[3]);
+ vst1q_u32(res, vpaddq_u32(r0, r1));
+}
+
+void vpx_sad64x64x4d_neon(const uint8_t *src_ptr, int src_stride,
+ const uint8_t *const ref_array[4], int ref_stride,
+ uint32_t *res) {
+ int i;
+ uint32x4_t r0, r1, r2, r3;
+ const uint8_t *ref_loop[4] = { ref_array[0], ref_array[1], ref_array[2],
+ ref_array[3] };
+ uint32x4_t sum[8] = { vdupq_n_u32(0), vdupq_n_u32(0), vdupq_n_u32(0),
+ vdupq_n_u32(0), vdupq_n_u32(0), vdupq_n_u32(0),
+ vdupq_n_u32(0), vdupq_n_u32(0) };
+
+ for (i = 0; i < 64; ++i) {
+ uint8x16_t s;
+
+ s = vld1q_u8(src_ptr + 0 * 16);
+ sad16_neon(ref_loop[0] + 0 * 16, s, &sum[0]);
+ sad16_neon(ref_loop[1] + 0 * 16, s, &sum[2]);
+ sad16_neon(ref_loop[2] + 0 * 16, s, &sum[4]);
+ sad16_neon(ref_loop[3] + 0 * 16, s, &sum[6]);
+
+ s = vld1q_u8(src_ptr + 1 * 16);
+ sad16_neon(ref_loop[0] + 1 * 16, s, &sum[0]);
+ sad16_neon(ref_loop[1] + 1 * 16, s, &sum[2]);
+ sad16_neon(ref_loop[2] + 1 * 16, s, &sum[4]);
+ sad16_neon(ref_loop[3] + 1 * 16, s, &sum[6]);
+
+ s = vld1q_u8(src_ptr + 2 * 16);
+ sad16_neon(ref_loop[0] + 2 * 16, s, &sum[1]);
+ sad16_neon(ref_loop[1] + 2 * 16, s, &sum[3]);
+ sad16_neon(ref_loop[2] + 2 * 16, s, &sum[5]);
+ sad16_neon(ref_loop[3] + 2 * 16, s, &sum[7]);
+
+ s = vld1q_u8(src_ptr + 3 * 16);
+ sad16_neon(ref_loop[0] + 3 * 16, s, &sum[1]);
+ sad16_neon(ref_loop[1] + 3 * 16, s, &sum[3]);
+ sad16_neon(ref_loop[2] + 3 * 16, s, &sum[5]);
+ sad16_neon(ref_loop[3] + 3 * 16, s, &sum[7]);
+
+ src_ptr += src_stride;
+ ref_loop[0] += ref_stride;
+ ref_loop[1] += ref_stride;
+ ref_loop[2] += ref_stride;
+ ref_loop[3] += ref_stride;
+ }
+
+ r0 = vpaddq_u32(sum[0], sum[1]);
+ r1 = vpaddq_u32(sum[2], sum[3]);
+ r2 = vpaddq_u32(sum[4], sum[5]);
+ r3 = vpaddq_u32(sum[6], sum[7]);
+ r0 = vpaddq_u32(r0, r1);
+ r1 = vpaddq_u32(r2, r3);
+ vst1q_u32(res, vpaddq_u32(r0, r1));
+}
+
+#else
+
+void vpx_sad64x32x4d_neon(const uint8_t *src_ptr, int src_stride,
+ const uint8_t *const ref_array[4], int ref_stride,
+ uint32_t *res) {
+ int i;
+ const uint8_t *ref_loop[4] = { ref_array[0], ref_array[1], ref_array[2],
+ ref_array[3] };
uint16x8_t sum[4] = { vdupq_n_u16(0), vdupq_n_u16(0), vdupq_n_u16(0),
vdupq_n_u16(0) };
@@ -378,3 +648,5 @@
sad_4096_pel_final_neon(sum, res);
}
+
+#endif
--- a/vpx_dsp/arm/sad_neon.c
+++ b/vpx_dsp/arm/sad_neon.c
@@ -23,7 +23,7 @@
const uint8x16_t ref_u8 = load_unaligned_u8q(ref_ptr, ref_stride);
uint16x8_t abs = vabdl_u8(vget_low_u8(src_u8), vget_low_u8(ref_u8));
abs = vabal_u8(abs, vget_high_u8(src_u8), vget_high_u8(ref_u8));
- return vget_lane_u32(horizontal_add_uint16x8(abs), 0);
+ return horizontal_add_uint16x8(abs);
}
uint32_t vpx_sad4x4_avg_neon(const uint8_t *src_ptr, int src_stride,
@@ -35,7 +35,7 @@
const uint8x16_t avg = vrhaddq_u8(ref_u8, second_pred_u8);
uint16x8_t abs = vabdl_u8(vget_low_u8(src_u8), vget_low_u8(avg));
abs = vabal_u8(abs, vget_high_u8(src_u8), vget_high_u8(avg));
- return vget_lane_u32(horizontal_add_uint16x8(abs), 0);
+ return horizontal_add_uint16x8(abs);
}
uint32_t vpx_sad4x8_neon(const uint8_t *src_ptr, int src_stride,
@@ -51,7 +51,7 @@
abs = vabal_u8(abs, vget_high_u8(src_u8), vget_high_u8(ref_u8));
}
- return vget_lane_u32(horizontal_add_uint16x8(abs), 0);
+ return horizontal_add_uint16x8(abs);
}
uint32_t vpx_sad4x8_avg_neon(const uint8_t *src_ptr, int src_stride,
@@ -71,7 +71,7 @@
abs = vabal_u8(abs, vget_high_u8(src_u8), vget_high_u8(avg));
}
- return vget_lane_u32(horizontal_add_uint16x8(abs), 0);
+ return horizontal_add_uint16x8(abs);
}
static INLINE uint16x8_t sad8x(const uint8_t *src_ptr, int src_stride,
@@ -114,7 +114,7 @@
uint32_t vpx_sad8x##n##_neon(const uint8_t *src_ptr, int src_stride, \
const uint8_t *ref_ptr, int ref_stride) { \
const uint16x8_t abs = sad8x(src_ptr, src_stride, ref_ptr, ref_stride, n); \
- return vget_lane_u32(horizontal_add_uint16x8(abs), 0); \
+ return horizontal_add_uint16x8(abs); \
} \
\
uint32_t vpx_sad8x##n##_avg_neon(const uint8_t *src_ptr, int src_stride, \
@@ -122,7 +122,7 @@
const uint8_t *second_pred) { \
const uint16x8_t abs = \
sad8x_avg(src_ptr, src_stride, ref_ptr, ref_stride, second_pred, n); \
- return vget_lane_u32(horizontal_add_uint16x8(abs), 0); \
+ return horizontal_add_uint16x8(abs); \
}
sad8xN(4);
@@ -172,7 +172,7 @@
const uint8_t *ref_ptr, int ref_stride) { \
const uint16x8_t abs = \
sad16x(src_ptr, src_stride, ref_ptr, ref_stride, n); \
- return vget_lane_u32(horizontal_add_uint16x8(abs), 0); \
+ return horizontal_add_uint16x8(abs); \
} \
\
uint32_t vpx_sad16x##n##_avg_neon(const uint8_t *src_ptr, int src_stride, \
@@ -180,7 +180,7 @@
const uint8_t *second_pred) { \
const uint16x8_t abs = \
sad16x_avg(src_ptr, src_stride, ref_ptr, ref_stride, second_pred, n); \
- return vget_lane_u32(horizontal_add_uint16x8(abs), 0); \
+ return horizontal_add_uint16x8(abs); \
}
sad16xN(8);
@@ -240,7 +240,7 @@
const uint8_t *ref_ptr, int ref_stride) { \
const uint16x8_t abs = \
sad32x(src_ptr, src_stride, ref_ptr, ref_stride, n); \
- return vget_lane_u32(horizontal_add_uint16x8(abs), 0); \
+ return horizontal_add_uint16x8(abs); \
} \
\
uint32_t vpx_sad32x##n##_avg_neon(const uint8_t *src_ptr, int src_stride, \
@@ -248,7 +248,7 @@
const uint8_t *second_pred) { \
const uint16x8_t abs = \
sad32x_avg(src_ptr, src_stride, ref_ptr, ref_stride, second_pred, n); \
- return vget_lane_u32(horizontal_add_uint16x8(abs), 0); \
+ return horizontal_add_uint16x8(abs); \
}
sad32xN(16);
@@ -338,7 +338,7 @@
const uint8_t *ref_ptr, int ref_stride) { \
const uint32x4_t abs = \
sad64x(src_ptr, src_stride, ref_ptr, ref_stride, n); \
- return vget_lane_u32(horizontal_add_uint32x4(abs), 0); \
+ return horizontal_add_uint32x4(abs); \
} \
\
uint32_t vpx_sad64x##n##_avg_neon(const uint8_t *src_ptr, int src_stride, \
@@ -346,7 +346,7 @@
const uint8_t *second_pred) { \
const uint32x4_t abs = \
sad64x_avg(src_ptr, src_stride, ref_ptr, ref_stride, second_pred, n); \
- return vget_lane_u32(horizontal_add_uint32x4(abs), 0); \
+ return horizontal_add_uint32x4(abs); \
}
sad64xN(32);
--- a/vpx_dsp/arm/sum_neon.h
+++ b/vpx_dsp/arm/sum_neon.h
@@ -16,23 +16,65 @@
#include "./vpx_config.h"
#include "vpx/vpx_integer.h"
-static INLINE int32x2_t horizontal_add_int16x8(const int16x8_t a) {
+static INLINE int32_t horizontal_add_int16x8(const int16x8_t a) {
+#if defined(__aarch64__)
+ return vaddlvq_s16(a);
+#else
const int32x4_t b = vpaddlq_s16(a);
const int64x2_t c = vpaddlq_s32(b);
- return vadd_s32(vreinterpret_s32_s64(vget_low_s64(c)),
- vreinterpret_s32_s64(vget_high_s64(c)));
+ const int32x2_t d = vadd_s32(vreinterpret_s32_s64(vget_low_s64(c)),
+ vreinterpret_s32_s64(vget_high_s64(c)));
+ return vget_lane_s32(d, 0);
+#endif
}
-static INLINE uint32x2_t horizontal_add_uint16x8(const uint16x8_t a) {
+static INLINE uint32_t horizontal_add_uint16x8(const uint16x8_t a) {
+#if defined(__aarch64__)
+ return vaddlvq_u16(a);
+#else
const uint32x4_t b = vpaddlq_u16(a);
const uint64x2_t c = vpaddlq_u32(b);
- return vadd_u32(vreinterpret_u32_u64(vget_low_u64(c)),
- vreinterpret_u32_u64(vget_high_u64(c)));
+ const uint32x2_t d = vadd_u32(vreinterpret_u32_u64(vget_low_u64(c)),
+ vreinterpret_u32_u64(vget_high_u64(c)));
+ return vget_lane_u32(d, 0);
+#endif
}
-static INLINE uint32x2_t horizontal_add_uint32x4(const uint32x4_t a) {
+static INLINE int32_t horizontal_add_int32x2(const int32x2_t a) {
+#if defined(__aarch64__)
+ return vaddv_s32(a);
+#else
+ return vget_lane_s32(a, 0) + vget_lane_s32(a, 1);
+#endif
+}
+
+static INLINE uint32_t horizontal_add_uint32x2(const uint32x2_t a) {
+#if defined(__aarch64__)
+ return vaddv_u32(a);
+#else
+ return vget_lane_u32(a, 0) + vget_lane_u32(a, 1);
+#endif
+}
+
+static INLINE int32_t horizontal_add_int32x4(const int32x4_t a) {
+#if defined(__aarch64__)
+ return vaddvq_s32(a);
+#else
+ const int64x2_t b = vpaddlq_s32(a);
+ const int32x2_t c = vadd_s32(vreinterpret_s32_s64(vget_low_s64(b)),
+ vreinterpret_s32_s64(vget_high_s64(b)));
+ return vget_lane_s32(c, 0);
+#endif
+}
+
+static INLINE uint32_t horizontal_add_uint32x4(const uint32x4_t a) {
+#if defined(__aarch64__)
+ return vaddvq_u32(a);
+#else
const uint64x2_t b = vpaddlq_u32(a);
- return vadd_u32(vreinterpret_u32_u64(vget_low_u64(b)),
- vreinterpret_u32_u64(vget_high_u64(b)));
+ const uint32x2_t c = vadd_u32(vreinterpret_u32_u64(vget_low_u64(b)),
+ vreinterpret_u32_u64(vget_high_u64(b)));
+ return vget_lane_u32(c, 0);
+#endif
}
#endif // VPX_VPX_DSP_ARM_SUM_NEON_H_
--- a/vpx_dsp/arm/variance_neon.c
+++ b/vpx_dsp/arm/variance_neon.c
@@ -19,6 +19,100 @@
#include "vpx_dsp/arm/sum_neon.h"
#include "vpx_ports/mem.h"
+#if defined(__ARM_FEATURE_DOTPROD) && (__ARM_FEATURE_DOTPROD == 1)
+
+// Process a block of width 4 four rows at a time.
+static void variance_neon_w4x4(const uint8_t *src_ptr, int src_stride,
+ const uint8_t *ref_ptr, int ref_stride, int h,
+ uint32_t *sse, int *sum) {
+ int i;
+ uint32x4_t sum_a = vdupq_n_u32(0);
+ uint32x4_t sum_b = vdupq_n_u32(0);
+ uint32x4_t sse_u32 = vdupq_n_u32(0);
+
+ for (i = 0; i < h; i += 4) {
+ const uint8x16_t a = load_unaligned_u8q(src_ptr, src_stride);
+ const uint8x16_t b = load_unaligned_u8q(ref_ptr, ref_stride);
+
+ const uint8x16_t abs_diff = vabdq_u8(a, b);
+ sse_u32 = vdotq_u32(sse_u32, abs_diff, abs_diff);
+
+ sum_a = vdotq_u32(sum_a, a, vdupq_n_u8(1));
+ sum_b = vdotq_u32(sum_b, b, vdupq_n_u8(1));
+
+ src_ptr += 4 * src_stride;
+ ref_ptr += 4 * ref_stride;
+ }
+
+ *sum = horizontal_add_int32x4(vreinterpretq_s32_u32(vsubq_u32(sum_a, sum_b)));
+ *sse = horizontal_add_uint32x4(sse_u32);
+}
+
+// Process a block of any size where the width is divisible by 16.
+static void variance_neon_w16(const uint8_t *src_ptr, int src_stride,
+ const uint8_t *ref_ptr, int ref_stride, int w,
+ int h, uint32_t *sse, int *sum) {
+ int i, j;
+ uint32x4_t sum_a = vdupq_n_u32(0);
+ uint32x4_t sum_b = vdupq_n_u32(0);
+ uint32x4_t sse_u32 = vdupq_n_u32(0);
+
+ for (i = 0; i < h; ++i) {
+ for (j = 0; j < w; j += 16) {
+ const uint8x16_t a = vld1q_u8(src_ptr + j);
+ const uint8x16_t b = vld1q_u8(ref_ptr + j);
+
+ const uint8x16_t abs_diff = vabdq_u8(a, b);
+ sse_u32 = vdotq_u32(sse_u32, abs_diff, abs_diff);
+
+ sum_a = vdotq_u32(sum_a, a, vdupq_n_u8(1));
+ sum_b = vdotq_u32(sum_b, b, vdupq_n_u8(1));
+ }
+ src_ptr += src_stride;
+ ref_ptr += ref_stride;
+ }
+
+ *sum = horizontal_add_int32x4(vreinterpretq_s32_u32(vsubq_u32(sum_a, sum_b)));
+ *sse = horizontal_add_uint32x4(sse_u32);
+}
+
+// Process a block of width 8 two rows at a time.
+static void variance_neon_w8x2(const uint8_t *src_ptr, int src_stride,
+ const uint8_t *ref_ptr, int ref_stride, int h,
+ uint32_t *sse, int *sum) {
+ int i = 0;
+ uint32x2_t sum_a = vdup_n_u32(0);
+ uint32x2_t sum_b = vdup_n_u32(0);
+ uint32x2_t sse_lo_u32 = vdup_n_u32(0);
+ uint32x2_t sse_hi_u32 = vdup_n_u32(0);
+
+ do {
+ const uint8x8_t a_0 = vld1_u8(src_ptr);
+ const uint8x8_t a_1 = vld1_u8(src_ptr + src_stride);
+ const uint8x8_t b_0 = vld1_u8(ref_ptr);
+ const uint8x8_t b_1 = vld1_u8(ref_ptr + ref_stride);
+
+ const uint8x8_t abs_diff_0 = vabd_u8(a_0, b_0);
+ const uint8x8_t abs_diff_1 = vabd_u8(a_1, b_1);
+ sse_lo_u32 = vdot_u32(sse_lo_u32, abs_diff_0, abs_diff_0);
+ sse_hi_u32 = vdot_u32(sse_hi_u32, abs_diff_1, abs_diff_1);
+
+ sum_a = vdot_u32(sum_a, a_0, vdup_n_u8(1));
+ sum_b = vdot_u32(sum_b, b_0, vdup_n_u8(1));
+ sum_a = vdot_u32(sum_a, a_1, vdup_n_u8(1));
+ sum_b = vdot_u32(sum_b, b_1, vdup_n_u8(1));
+
+ src_ptr += src_stride + src_stride;
+ ref_ptr += ref_stride + ref_stride;
+ i += 2;
+ } while (i < h);
+
+ *sum = horizontal_add_int32x2(vreinterpret_s32_u32(vsub_u32(sum_a, sum_b)));
+ *sse = horizontal_add_uint32x2(vadd_u32(sse_lo_u32, sse_hi_u32));
+}
+
+#else
+
// The variance helper functions use int16_t for sum. 8 values are accumulated
// and then added (at which point they expand up to int32_t). To avoid overflow,
// there can be no more than 32767 / 255 ~= 128 values accumulated in each
@@ -66,10 +160,9 @@
ref_ptr += 4 * ref_stride;
}
- *sum = vget_lane_s32(horizontal_add_int16x8(sum_s16), 0);
- *sse = vget_lane_u32(horizontal_add_uint32x4(vreinterpretq_u32_s32(
- vaddq_s32(sse_lo_s32, sse_hi_s32))),
- 0);
+ *sum = horizontal_add_int16x8(sum_s16);
+ *sse = horizontal_add_uint32x4(
+ vreinterpretq_u32_s32(vaddq_s32(sse_lo_s32, sse_hi_s32)));
}
// Process a block of any size where the width is divisible by 16.
@@ -115,10 +208,9 @@
ref_ptr += ref_stride;
}
- *sum = vget_lane_s32(horizontal_add_int16x8(sum_s16), 0);
- *sse = vget_lane_u32(horizontal_add_uint32x4(vreinterpretq_u32_s32(
- vaddq_s32(sse_lo_s32, sse_hi_s32))),
- 0);
+ *sum = horizontal_add_int16x8(sum_s16);
+ *sse = horizontal_add_uint32x4(
+ vreinterpretq_u32_s32(vaddq_s32(sse_lo_s32, sse_hi_s32)));
}
// Process a block of width 8 two rows at a time.
@@ -157,12 +249,13 @@
i += 2;
} while (i < h);
- *sum = vget_lane_s32(horizontal_add_int16x8(sum_s16), 0);
- *sse = vget_lane_u32(horizontal_add_uint32x4(vreinterpretq_u32_s32(
- vaddq_s32(sse_lo_s32, sse_hi_s32))),
- 0);
+ *sum = horizontal_add_int16x8(sum_s16);
+ *sse = horizontal_add_uint32x4(
+ vreinterpretq_u32_s32(vaddq_s32(sse_lo_s32, sse_hi_s32)));
}
+#endif
+
void vpx_get8x8var_neon(const uint8_t *src_ptr, int src_stride,
const uint8_t *ref_ptr, int ref_stride,
unsigned int *sse, int *sum) {
@@ -264,117 +357,165 @@
return *sse - (unsigned int)(((int64_t)sum1 * sum1) >> 12);
}
+#if defined(__ARM_FEATURE_DOTPROD) && (__ARM_FEATURE_DOTPROD == 1)
+
unsigned int vpx_mse16x16_neon(const unsigned char *src_ptr, int src_stride,
const unsigned char *ref_ptr, int ref_stride,
unsigned int *sse) {
int i;
- int16x4_t d22s16, d23s16, d24s16, d25s16, d26s16, d27s16, d28s16, d29s16;
- int64x1_t d0s64;
- uint8x16_t q0u8, q1u8, q2u8, q3u8;
- int32x4_t q7s32, q8s32, q9s32, q10s32;
- uint16x8_t q11u16, q12u16, q13u16, q14u16;
- int64x2_t q1s64;
+ uint8x16_t a[2], b[2], abs_diff[2];
+ uint32x4_t sse_vec[2] = { vdupq_n_u32(0), vdupq_n_u32(0) };
- q7s32 = vdupq_n_s32(0);
- q8s32 = vdupq_n_s32(0);
- q9s32 = vdupq_n_s32(0);
- q10s32 = vdupq_n_s32(0);
-
- for (i = 0; i < 8; i++) { // mse16x16_neon_loop
- q0u8 = vld1q_u8(src_ptr);
+ for (i = 0; i < 8; i++) {
+ a[0] = vld1q_u8(src_ptr);
src_ptr += src_stride;
- q1u8 = vld1q_u8(src_ptr);
+ a[1] = vld1q_u8(src_ptr);
src_ptr += src_stride;
- q2u8 = vld1q_u8(ref_ptr);
+ b[0] = vld1q_u8(ref_ptr);
ref_ptr += ref_stride;
- q3u8 = vld1q_u8(ref_ptr);
+ b[1] = vld1q_u8(ref_ptr);
ref_ptr += ref_stride;
- q11u16 = vsubl_u8(vget_low_u8(q0u8), vget_low_u8(q2u8));
- q12u16 = vsubl_u8(vget_high_u8(q0u8), vget_high_u8(q2u8));
- q13u16 = vsubl_u8(vget_low_u8(q1u8), vget_low_u8(q3u8));
- q14u16 = vsubl_u8(vget_high_u8(q1u8), vget_high_u8(q3u8));
+ abs_diff[0] = vabdq_u8(a[0], b[0]);
+ abs_diff[1] = vabdq_u8(a[1], b[1]);
- d22s16 = vreinterpret_s16_u16(vget_low_u16(q11u16));
- d23s16 = vreinterpret_s16_u16(vget_high_u16(q11u16));
- q7s32 = vmlal_s16(q7s32, d22s16, d22s16);
- q8s32 = vmlal_s16(q8s32, d23s16, d23s16);
+ sse_vec[0] = vdotq_u32(sse_vec[0], abs_diff[0], abs_diff[0]);
+ sse_vec[1] = vdotq_u32(sse_vec[1], abs_diff[1], abs_diff[1]);
+ }
- d24s16 = vreinterpret_s16_u16(vget_low_u16(q12u16));
- d25s16 = vreinterpret_s16_u16(vget_high_u16(q12u16));
- q9s32 = vmlal_s16(q9s32, d24s16, d24s16);
- q10s32 = vmlal_s16(q10s32, d25s16, d25s16);
+ *sse = horizontal_add_uint32x4(vaddq_u32(sse_vec[0], sse_vec[1]));
+ return horizontal_add_uint32x4(vaddq_u32(sse_vec[0], sse_vec[1]));
+}
- d26s16 = vreinterpret_s16_u16(vget_low_u16(q13u16));
- d27s16 = vreinterpret_s16_u16(vget_high_u16(q13u16));
- q7s32 = vmlal_s16(q7s32, d26s16, d26s16);
- q8s32 = vmlal_s16(q8s32, d27s16, d27s16);
+unsigned int vpx_get4x4sse_cs_neon(const unsigned char *src_ptr, int src_stride,
+ const unsigned char *ref_ptr,
+ int ref_stride) {
+ uint8x8_t a[4], b[4], abs_diff[4];
+ uint32x2_t sse = vdup_n_u32(0);
+
+ a[0] = vld1_u8(src_ptr);
+ src_ptr += src_stride;
+ b[0] = vld1_u8(ref_ptr);
+ ref_ptr += ref_stride;
+ a[1] = vld1_u8(src_ptr);
+ src_ptr += src_stride;
+ b[1] = vld1_u8(ref_ptr);
+ ref_ptr += ref_stride;
+ a[2] = vld1_u8(src_ptr);
+ src_ptr += src_stride;
+ b[2] = vld1_u8(ref_ptr);
+ ref_ptr += ref_stride;
+ a[3] = vld1_u8(src_ptr);
+ b[3] = vld1_u8(ref_ptr);
+
+ abs_diff[0] = vabd_u8(a[0], b[0]);
+ abs_diff[1] = vabd_u8(a[1], b[1]);
+ abs_diff[2] = vabd_u8(a[2], b[2]);
+ abs_diff[3] = vabd_u8(a[3], b[3]);
+
+ sse = vdot_u32(sse, abs_diff[0], abs_diff[0]);
+ sse = vdot_u32(sse, abs_diff[1], abs_diff[1]);
+ sse = vdot_u32(sse, abs_diff[2], abs_diff[2]);
+ sse = vdot_u32(sse, abs_diff[3], abs_diff[3]);
- d28s16 = vreinterpret_s16_u16(vget_low_u16(q14u16));
- d29s16 = vreinterpret_s16_u16(vget_high_u16(q14u16));
- q9s32 = vmlal_s16(q9s32, d28s16, d28s16);
- q10s32 = vmlal_s16(q10s32, d29s16, d29s16);
- }
+ return vget_lane_u32(sse, 0);
+}
- q7s32 = vaddq_s32(q7s32, q8s32);
- q9s32 = vaddq_s32(q9s32, q10s32);
- q10s32 = vaddq_s32(q7s32, q9s32);
+#else
- q1s64 = vpaddlq_s32(q10s32);
- d0s64 = vadd_s64(vget_low_s64(q1s64), vget_high_s64(q1s64));
+unsigned int vpx_mse16x16_neon(const unsigned char *src_ptr, int src_stride,
+ const unsigned char *ref_ptr, int ref_stride,
+ unsigned int *sse) {
+ int i;
+ uint8x16_t a[2], b[2];
+ int16x4_t diff_lo[4], diff_hi[4];
+ uint16x8_t diff[4];
+ int32x4_t sse_vec[4] = { vdupq_n_s32(0), vdupq_n_s32(0), vdupq_n_s32(0),
+ vdupq_n_s32(0) };
- vst1_lane_u32((uint32_t *)sse, vreinterpret_u32_s64(d0s64), 0);
- return vget_lane_u32(vreinterpret_u32_s64(d0s64), 0);
+ for (i = 0; i < 8; i++) {
+ a[0] = vld1q_u8(src_ptr);
+ src_ptr += src_stride;
+ a[1] = vld1q_u8(src_ptr);
+ src_ptr += src_stride;
+ b[0] = vld1q_u8(ref_ptr);
+ ref_ptr += ref_stride;
+ b[1] = vld1q_u8(ref_ptr);
+ ref_ptr += ref_stride;
+
+ diff[0] = vsubl_u8(vget_low_u8(a[0]), vget_low_u8(b[0]));
+ diff[1] = vsubl_u8(vget_high_u8(a[0]), vget_high_u8(b[0]));
+ diff[2] = vsubl_u8(vget_low_u8(a[1]), vget_low_u8(b[1]));
+ diff[3] = vsubl_u8(vget_high_u8(a[1]), vget_high_u8(b[1]));
+
+ diff_lo[0] = vreinterpret_s16_u16(vget_low_u16(diff[0]));
+ diff_lo[1] = vreinterpret_s16_u16(vget_low_u16(diff[1]));
+ sse_vec[0] = vmlal_s16(sse_vec[0], diff_lo[0], diff_lo[0]);
+ sse_vec[1] = vmlal_s16(sse_vec[1], diff_lo[1], diff_lo[1]);
+
+ diff_lo[2] = vreinterpret_s16_u16(vget_low_u16(diff[2]));
+ diff_lo[3] = vreinterpret_s16_u16(vget_low_u16(diff[3]));
+ sse_vec[2] = vmlal_s16(sse_vec[2], diff_lo[2], diff_lo[2]);
+ sse_vec[3] = vmlal_s16(sse_vec[3], diff_lo[3], diff_lo[3]);
+
+ diff_hi[0] = vreinterpret_s16_u16(vget_high_u16(diff[0]));
+ diff_hi[1] = vreinterpret_s16_u16(vget_high_u16(diff[1]));
+ sse_vec[0] = vmlal_s16(sse_vec[0], diff_hi[0], diff_hi[0]);
+ sse_vec[1] = vmlal_s16(sse_vec[1], diff_hi[1], diff_hi[1]);
+
+ diff_hi[2] = vreinterpret_s16_u16(vget_high_u16(diff[2]));
+ diff_hi[3] = vreinterpret_s16_u16(vget_high_u16(diff[3]));
+ sse_vec[2] = vmlal_s16(sse_vec[2], diff_hi[2], diff_hi[2]);
+ sse_vec[3] = vmlal_s16(sse_vec[3], diff_hi[3], diff_hi[3]);
+ }
+
+ sse_vec[0] = vaddq_s32(sse_vec[0], sse_vec[1]);
+ sse_vec[2] = vaddq_s32(sse_vec[2], sse_vec[3]);
+ sse_vec[0] = vaddq_s32(sse_vec[0], sse_vec[2]);
+
+ *sse = horizontal_add_uint32x4(vreinterpretq_u32_s32(sse_vec[0]));
+ return horizontal_add_uint32x4(vreinterpretq_u32_s32(sse_vec[0]));
}
unsigned int vpx_get4x4sse_cs_neon(const unsigned char *src_ptr, int src_stride,
const unsigned char *ref_ptr,
int ref_stride) {
- int16x4_t d22s16, d24s16, d26s16, d28s16;
- int64x1_t d0s64;
- uint8x8_t d0u8, d1u8, d2u8, d3u8, d4u8, d5u8, d6u8, d7u8;
- int32x4_t q7s32, q8s32, q9s32, q10s32;
- uint16x8_t q11u16, q12u16, q13u16, q14u16;
- int64x2_t q1s64;
+ uint8x8_t a[4], b[4];
+ int16x4_t diff_lo[4];
+ uint16x8_t diff[4];
+ int32x4_t sse;
- d0u8 = vld1_u8(src_ptr);
+ a[0] = vld1_u8(src_ptr);
src_ptr += src_stride;
- d4u8 = vld1_u8(ref_ptr);
+ b[0] = vld1_u8(ref_ptr);
ref_ptr += ref_stride;
- d1u8 = vld1_u8(src_ptr);
+ a[1] = vld1_u8(src_ptr);
src_ptr += src_stride;
- d5u8 = vld1_u8(ref_ptr);
+ b[1] = vld1_u8(ref_ptr);
ref_ptr += ref_stride;
- d2u8 = vld1_u8(src_ptr);
+ a[2] = vld1_u8(src_ptr);
src_ptr += src_stride;
- d6u8 = vld1_u8(ref_ptr);
+ b[2] = vld1_u8(ref_ptr);
ref_ptr += ref_stride;
- d3u8 = vld1_u8(src_ptr);
- src_ptr += src_stride;
- d7u8 = vld1_u8(ref_ptr);
- ref_ptr += ref_stride;
+ a[3] = vld1_u8(src_ptr);
+ b[3] = vld1_u8(ref_ptr);
- q11u16 = vsubl_u8(d0u8, d4u8);
- q12u16 = vsubl_u8(d1u8, d5u8);
- q13u16 = vsubl_u8(d2u8, d6u8);
- q14u16 = vsubl_u8(d3u8, d7u8);
+ diff[0] = vsubl_u8(a[0], b[0]);
+ diff[1] = vsubl_u8(a[1], b[1]);
+ diff[2] = vsubl_u8(a[2], b[2]);
+ diff[3] = vsubl_u8(a[3], b[3]);
- d22s16 = vget_low_s16(vreinterpretq_s16_u16(q11u16));
- d24s16 = vget_low_s16(vreinterpretq_s16_u16(q12u16));
- d26s16 = vget_low_s16(vreinterpretq_s16_u16(q13u16));
- d28s16 = vget_low_s16(vreinterpretq_s16_u16(q14u16));
+ diff_lo[0] = vget_low_s16(vreinterpretq_s16_u16(diff[0]));
+ diff_lo[1] = vget_low_s16(vreinterpretq_s16_u16(diff[1]));
+ diff_lo[2] = vget_low_s16(vreinterpretq_s16_u16(diff[2]));
+ diff_lo[3] = vget_low_s16(vreinterpretq_s16_u16(diff[3]));
- q7s32 = vmull_s16(d22s16, d22s16);
- q8s32 = vmull_s16(d24s16, d24s16);
- q9s32 = vmull_s16(d26s16, d26s16);
- q10s32 = vmull_s16(d28s16, d28s16);
+ sse = vmull_s16(diff_lo[0], diff_lo[0]);
+ sse = vmlal_s16(sse, diff_lo[1], diff_lo[1]);
+ sse = vmlal_s16(sse, diff_lo[2], diff_lo[2]);
+ sse = vmlal_s16(sse, diff_lo[3], diff_lo[3]);
- q7s32 = vaddq_s32(q7s32, q8s32);
- q9s32 = vaddq_s32(q9s32, q10s32);
- q9s32 = vaddq_s32(q7s32, q9s32);
-
- q1s64 = vpaddlq_s32(q9s32);
- d0s64 = vadd_s64(vget_low_s64(q1s64), vget_high_s64(q1s64));
-
- return vget_lane_u32(vreinterpret_u32_s64(d0s64), 0);
+ return horizontal_add_uint32x4(vreinterpretq_u32_s32(sse));
}
+
+#endif
--- a/vpx_dsp/arm/vpx_convolve8_neon.c
+++ b/vpx_dsp/arm/vpx_convolve8_neon.c
@@ -14,6 +14,7 @@
#include "./vpx_config.h"
#include "./vpx_dsp_rtcd.h"
#include "vpx/vpx_integer.h"
+#include "vpx_dsp/arm/mem_neon.h"
#include "vpx_dsp/arm/transpose_neon.h"
#include "vpx_dsp/arm/vpx_convolve8_neon.h"
#include "vpx_ports/mem.h"
@@ -30,6 +31,741 @@
// instructions. This optimization is much faster in speed unit test, but slowed
// down the whole decoder by 5%.
+#if defined(__aarch64__) && defined(__ARM_FEATURE_DOTPROD) && \
+ (__ARM_FEATURE_DOTPROD == 1)
+DECLARE_ALIGNED(16, static const uint8_t, dot_prod_permute_tbl[48]) = {
+ 0, 1, 2, 3, 1, 2, 3, 4, 2, 3, 4, 5, 3, 4, 5, 6,
+ 4, 5, 6, 7, 5, 6, 7, 8, 6, 7, 8, 9, 7, 8, 9, 10,
+ 8, 9, 10, 11, 9, 10, 11, 12, 10, 11, 12, 13, 11, 12, 13, 14
+};
+
+DECLARE_ALIGNED(16, static const uint8_t, dot_prod_tran_concat_tbl[32]) = {
+ 0, 8, 16, 24, 1, 9, 17, 25, 2, 10, 18, 26, 3, 11, 19, 27,
+ 4, 12, 20, 28, 5, 13, 21, 29, 6, 14, 22, 30, 7, 15, 23, 31
+};
+
+DECLARE_ALIGNED(16, static const uint8_t, dot_prod_merge_block_tbl[48]) = {
+ /* Shift left and insert new last column in transposed 4x4 block. */
+ 1, 2, 3, 16, 5, 6, 7, 20, 9, 10, 11, 24, 13, 14, 15, 28,
+ /* Shift left and insert two new columns in transposed 4x4 block. */
+ 2, 3, 16, 17, 6, 7, 20, 21, 10, 11, 24, 25, 14, 15, 28, 29,
+ /* Shift left and insert three new columns in transposed 4x4 block. */
+ 3, 16, 17, 18, 7, 20, 21, 22, 11, 24, 25, 26, 15, 28, 29, 30
+};
+
+static INLINE void transpose_concat_4x4(int8x8_t *a0, int8x8_t *a1,
+ int8x8_t *a2, int8x8_t *a3,
+ int8x16_t *b,
+ const uint8x16_t permute_tbl) {
+ /* Transpose 8-bit elements and concatenate result rows as follows:
+ * a0: 00, 01, 02, 03, XX, XX, XX, XX
+ * a1: 10, 11, 12, 13, XX, XX, XX, XX
+ * a2: 20, 21, 22, 23, XX, XX, XX, XX
+ * a3: 30, 31, 32, 33, XX, XX, XX, XX
+ *
+ * b: 00, 10, 20, 30, 01, 11, 21, 31, 02, 12, 22, 32, 03, 13, 23, 33
+ *
+ * The 'permute_tbl' is always 'dot_prod_tran_concat_tbl' above. Passing it
+ * as an argument is preferable to loading it directly from memory as this
+ * inline helper is called many times from the same parent function.
+ */
+
+ int8x16x2_t samples = { { vcombine_s8(*a0, *a1), vcombine_s8(*a2, *a3) } };
+ *b = vqtbl2q_s8(samples, permute_tbl);
+}
+
+static INLINE void transpose_concat_8x4(int8x8_t *a0, int8x8_t *a1,
+ int8x8_t *a2, int8x8_t *a3,
+ int8x16_t *b0, int8x16_t *b1,
+ const uint8x16x2_t permute_tbl) {
+ /* Transpose 8-bit elements and concatenate result rows as follows:
+ * a0: 00, 01, 02, 03, 04, 05, 06, 07
+ * a1: 10, 11, 12, 13, 14, 15, 16, 17
+ * a2: 20, 21, 22, 23, 24, 25, 26, 27
+ * a3: 30, 31, 32, 33, 34, 35, 36, 37
+ *
+ * b0: 00, 10, 20, 30, 01, 11, 21, 31, 02, 12, 22, 32, 03, 13, 23, 33
+ * b1: 04, 14, 24, 34, 05, 15, 25, 35, 06, 16, 26, 36, 07, 17, 27, 37
+ *
+ * The 'permute_tbl' is always 'dot_prod_tran_concat_tbl' above. Passing it
+ * as an argument is preferable to loading it directly from memory as this
+ * inline helper is called many times from the same parent function.
+ */
+
+ int8x16x2_t samples = { { vcombine_s8(*a0, *a1), vcombine_s8(*a2, *a3) } };
+ *b0 = vqtbl2q_s8(samples, permute_tbl.val[0]);
+ *b1 = vqtbl2q_s8(samples, permute_tbl.val[1]);
+}
+
+void vpx_convolve8_horiz_neon(const uint8_t *src, ptrdiff_t src_stride,
+ uint8_t *dst, ptrdiff_t dst_stride,
+ const InterpKernel *filter, int x0_q4,
+ int x_step_q4, int y0_q4, int y_step_q4, int w,
+ int h) {
+ const int8x8_t filters = vmovn_s16(vld1q_s16(filter[x0_q4]));
+ const int16x8_t correct_tmp = vmulq_n_s16(vld1q_s16(filter[x0_q4]), 128);
+ const int32x4_t correction = vdupq_n_s32((int32_t)vaddvq_s16(correct_tmp));
+ const uint8x16_t range_limit = vdupq_n_u8(128);
+ uint8x16_t s0, s1, s2, s3;
+
+ assert(!((intptr_t)dst & 3));
+ assert(!(dst_stride & 3));
+ assert(x_step_q4 == 16);
+
+ (void)x_step_q4;
+ (void)y0_q4;
+ (void)y_step_q4;
+
+ src -= 3;
+
+ if (w == 4) {
+ const uint8x16x2_t permute_tbl = vld1q_u8_x2(dot_prod_permute_tbl);
+ do {
+ int32x4_t t0, t1, t2, t3;
+ int16x8_t t01, t23;
+ uint8x8_t d01, d23;
+
+ s0 = vld1q_u8(src);
+ src += src_stride;
+ s1 = vld1q_u8(src);
+ src += src_stride;
+ s2 = vld1q_u8(src);
+ src += src_stride;
+ s3 = vld1q_u8(src);
+ src += src_stride;
+
+ t0 = convolve8_4_dot(s0, filters, correction, range_limit, permute_tbl);
+ t1 = convolve8_4_dot(s1, filters, correction, range_limit, permute_tbl);
+ t2 = convolve8_4_dot(s2, filters, correction, range_limit, permute_tbl);
+ t3 = convolve8_4_dot(s3, filters, correction, range_limit, permute_tbl);
+
+ t01 = vcombine_s16(vqmovn_s32(t0), vqmovn_s32(t1));
+ t23 = vcombine_s16(vqmovn_s32(t2), vqmovn_s32(t3));
+ d01 = vqrshrun_n_s16(t01, 7);
+ d23 = vqrshrun_n_s16(t23, 7);
+
+ vst1_lane_u32((uint32_t *)dst, vreinterpret_u32_u8(d01), 0);
+ dst += dst_stride;
+ vst1_lane_u32((uint32_t *)dst, vreinterpret_u32_u8(d01), 1);
+ dst += dst_stride;
+ vst1_lane_u32((uint32_t *)dst, vreinterpret_u32_u8(d23), 0);
+ dst += dst_stride;
+ vst1_lane_u32((uint32_t *)dst, vreinterpret_u32_u8(d23), 1);
+ dst += dst_stride;
+ h -= 4;
+ } while (h > 0);
+ } else {
+ const uint8x16x3_t permute_tbl = vld1q_u8_x3(dot_prod_permute_tbl);
+ const uint8_t *s;
+ uint8_t *d;
+ int width;
+ uint8x8_t d0, d1, d2, d3;
+
+ do {
+ width = w;
+ s = src;
+ d = dst;
+ do {
+ s0 = vld1q_u8(s + 0 * src_stride);
+ s1 = vld1q_u8(s + 1 * src_stride);
+ s2 = vld1q_u8(s + 2 * src_stride);
+ s3 = vld1q_u8(s + 3 * src_stride);
+
+ d0 = convolve8_8_dot(s0, filters, correction, range_limit, permute_tbl);
+ d1 = convolve8_8_dot(s1, filters, correction, range_limit, permute_tbl);
+ d2 = convolve8_8_dot(s2, filters, correction, range_limit, permute_tbl);
+ d3 = convolve8_8_dot(s3, filters, correction, range_limit, permute_tbl);
+
+ vst1_u8(d + 0 * dst_stride, d0);
+ vst1_u8(d + 1 * dst_stride, d1);
+ vst1_u8(d + 2 * dst_stride, d2);
+ vst1_u8(d + 3 * dst_stride, d3);
+
+ s += 8;
+ d += 8;
+ width -= 8;
+ } while (width > 0);
+ src += 4 * src_stride;
+ dst += 4 * dst_stride;
+ h -= 4;
+ } while (h > 0);
+ }
+}
+
+void vpx_convolve8_avg_horiz_neon(const uint8_t *src, ptrdiff_t src_stride,
+ uint8_t *dst, ptrdiff_t dst_stride,
+ const InterpKernel *filter, int x0_q4,
+ int x_step_q4, int y0_q4, int y_step_q4,
+ int w, int h) {
+ const int8x8_t filters = vmovn_s16(vld1q_s16(filter[x0_q4]));
+ const int16x8_t correct_tmp = vmulq_n_s16(vld1q_s16(filter[x0_q4]), 128);
+ const int32x4_t correction = vdupq_n_s32((int32_t)vaddvq_s16(correct_tmp));
+ const uint8x16_t range_limit = vdupq_n_u8(128);
+ uint8x16_t s0, s1, s2, s3;
+
+ assert(!((intptr_t)dst & 3));
+ assert(!(dst_stride & 3));
+ assert(x_step_q4 == 16);
+
+ (void)x_step_q4;
+ (void)y0_q4;
+ (void)y_step_q4;
+
+ src -= 3;
+
+ if (w == 4) {
+ const uint8x16x2_t permute_tbl = vld1q_u8_x2(dot_prod_permute_tbl);
+ do {
+ int32x4_t t0, t1, t2, t3;
+ int16x8_t t01, t23;
+ uint8x8_t d01, d23, dd01, dd23;
+ dd01 = vdup_n_u8(0);
+ dd23 = vdup_n_u8(0);
+
+ s0 = vld1q_u8(src);
+ src += src_stride;
+ s1 = vld1q_u8(src);
+ src += src_stride;
+ s2 = vld1q_u8(src);
+ src += src_stride;
+ s3 = vld1q_u8(src);
+ src += src_stride;
+
+ t0 = convolve8_4_dot(s0, filters, correction, range_limit, permute_tbl);
+ t1 = convolve8_4_dot(s1, filters, correction, range_limit, permute_tbl);
+ t2 = convolve8_4_dot(s2, filters, correction, range_limit, permute_tbl);
+ t3 = convolve8_4_dot(s3, filters, correction, range_limit, permute_tbl);
+
+ t01 = vcombine_s16(vqmovn_s32(t0), vqmovn_s32(t1));
+ t23 = vcombine_s16(vqmovn_s32(t2), vqmovn_s32(t3));
+ d01 = vqrshrun_n_s16(t01, 7);
+ d23 = vqrshrun_n_s16(t23, 7);
+
+ dd01 = load_u8(dst + 0 * dst_stride, dst_stride);
+ dd23 = load_u8(dst + 2 * dst_stride, dst_stride);
+ d01 = vrhadd_u8(d01, dd01);
+ d23 = vrhadd_u8(d23, dd23);
+
+ vst1_lane_u32((uint32_t *)dst, vreinterpret_u32_u8(d01), 0);
+ dst += dst_stride;
+ vst1_lane_u32((uint32_t *)dst, vreinterpret_u32_u8(d01), 1);
+ dst += dst_stride;
+ vst1_lane_u32((uint32_t *)dst, vreinterpret_u32_u8(d23), 0);
+ dst += dst_stride;
+ vst1_lane_u32((uint32_t *)dst, vreinterpret_u32_u8(d23), 1);
+ dst += dst_stride;
+ h -= 4;
+ } while (h > 0);
+ } else {
+ const uint8x16x3_t permute_tbl = vld1q_u8_x3(dot_prod_permute_tbl);
+ const uint8_t *s;
+ uint8_t *d;
+ int width;
+ uint8x8_t d0, d1, d2, d3, dd0, dd1, dd2, dd3;
+
+ do {
+ width = w;
+ s = src;
+ d = dst;
+ do {
+ s0 = vld1q_u8(s + 0 * src_stride);
+ s1 = vld1q_u8(s + 1 * src_stride);
+ s2 = vld1q_u8(s + 2 * src_stride);
+ s3 = vld1q_u8(s + 3 * src_stride);
+
+ d0 = convolve8_8_dot(s0, filters, correction, range_limit, permute_tbl);
+ d1 = convolve8_8_dot(s1, filters, correction, range_limit, permute_tbl);
+ d2 = convolve8_8_dot(s2, filters, correction, range_limit, permute_tbl);
+ d3 = convolve8_8_dot(s3, filters, correction, range_limit, permute_tbl);
+
+ dd0 = vld1_u8(d + 0 * dst_stride);
+ dd1 = vld1_u8(d + 1 * dst_stride);
+ dd2 = vld1_u8(d + 2 * dst_stride);
+ dd3 = vld1_u8(d + 3 * dst_stride);
+ d0 = vrhadd_u8(d0, dd0);
+ d1 = vrhadd_u8(d1, dd1);
+ d2 = vrhadd_u8(d2, dd2);
+ d3 = vrhadd_u8(d3, dd3);
+
+ vst1_u8(d + 0 * dst_stride, d0);
+ vst1_u8(d + 1 * dst_stride, d1);
+ vst1_u8(d + 2 * dst_stride, d2);
+ vst1_u8(d + 3 * dst_stride, d3);
+
+ s += 8;
+ d += 8;
+ width -= 8;
+ } while (width > 0);
+ src += 4 * src_stride;
+ dst += 4 * dst_stride;
+ h -= 4;
+ } while (h > 0);
+ }
+}
+
+void vpx_convolve8_vert_neon(const uint8_t *src, ptrdiff_t src_stride,
+ uint8_t *dst, ptrdiff_t dst_stride,
+ const InterpKernel *filter, int x0_q4,
+ int x_step_q4, int y0_q4, int y_step_q4, int w,
+ int h) {
+ const int8x8_t filters = vmovn_s16(vld1q_s16(filter[y0_q4]));
+ const int16x8_t correct_tmp = vmulq_n_s16(vld1q_s16(filter[y0_q4]), 128);
+ const int32x4_t correction = vdupq_n_s32((int32_t)vaddvq_s16(correct_tmp));
+ const uint8x8_t range_limit = vdup_n_u8(128);
+ const uint8x16x3_t merge_block_tbl = vld1q_u8_x3(dot_prod_merge_block_tbl);
+ uint8x8_t t0, t1, t2, t3, t4, t5, t6;
+ int8x8_t s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10;
+ int8x16x2_t samples_LUT;
+
+ assert(!((intptr_t)dst & 3));
+ assert(!(dst_stride & 3));
+ assert(y_step_q4 == 16);
+
+ (void)x0_q4;
+ (void)x_step_q4;
+ (void)y_step_q4;
+
+ src -= 3 * src_stride;
+
+ if (w == 4) {
+ const uint8x16_t tran_concat_tbl = vld1q_u8(dot_prod_tran_concat_tbl);
+ int8x16_t s0123, s1234, s2345, s3456, s4567, s5678, s6789, s78910;
+ int32x4_t d0, d1, d2, d3;
+ uint8x8_t d01, d23;
+
+ load_u8_8x4(src, src_stride, &t0, &t1, &t2, &t3);
+ src += 4 * src_stride;
+ t4 = vld1_u8(src);
+ src += src_stride;
+ t5 = vld1_u8(src);
+ src += src_stride;
+ t6 = vld1_u8(src);
+ src += src_stride;
+
+ /* Clamp sample range to [-128, 127] for 8-bit signed dot product. */
+ s0 = vreinterpret_s8_u8(vsub_u8(t0, range_limit));
+ s1 = vreinterpret_s8_u8(vsub_u8(t1, range_limit));
+ s2 = vreinterpret_s8_u8(vsub_u8(t2, range_limit));
+ s3 = vreinterpret_s8_u8(vsub_u8(t3, range_limit));
+ s4 = vreinterpret_s8_u8(vsub_u8(t4, range_limit));
+ s5 = vreinterpret_s8_u8(vsub_u8(t5, range_limit));
+ s6 = vreinterpret_s8_u8(vsub_u8(t6, range_limit));
+ s7 = vdup_n_s8(0);
+ s8 = vdup_n_s8(0);
+ s9 = vdup_n_s8(0);
+
+ /* This operation combines a conventional transpose and the sample permute
+ * (see horizontal case) required before computing the dot product.
+ */
+ transpose_concat_4x4(&s0, &s1, &s2, &s3, &s0123, tran_concat_tbl);
+ transpose_concat_4x4(&s1, &s2, &s3, &s4, &s1234, tran_concat_tbl);
+ transpose_concat_4x4(&s2, &s3, &s4, &s5, &s2345, tran_concat_tbl);
+ transpose_concat_4x4(&s3, &s4, &s5, &s6, &s3456, tran_concat_tbl);
+ transpose_concat_4x4(&s4, &s5, &s6, &s7, &s4567, tran_concat_tbl);
+ transpose_concat_4x4(&s5, &s6, &s7, &s8, &s5678, tran_concat_tbl);
+ transpose_concat_4x4(&s6, &s7, &s8, &s9, &s6789, tran_concat_tbl);
+
+ do {
+ uint8x8_t t7, t8, t9, t10;
+
+ load_u8_8x4(src, src_stride, &t7, &t8, &t9, &t10);
+
+ s7 = vreinterpret_s8_u8(vsub_u8(t7, range_limit));
+ s8 = vreinterpret_s8_u8(vsub_u8(t8, range_limit));
+ s9 = vreinterpret_s8_u8(vsub_u8(t9, range_limit));
+ s10 = vreinterpret_s8_u8(vsub_u8(t10, range_limit));
+
+ transpose_concat_4x4(&s7, &s8, &s9, &s10, &s78910, tran_concat_tbl);
+
+ /* Merge new data into block from previous iteration. */
+ samples_LUT.val[0] = s3456;
+ samples_LUT.val[1] = s78910;
+ s4567 = vqtbl2q_s8(samples_LUT, merge_block_tbl.val[0]);
+ s5678 = vqtbl2q_s8(samples_LUT, merge_block_tbl.val[1]);
+ s6789 = vqtbl2q_s8(samples_LUT, merge_block_tbl.val[2]);
+
+ d0 = convolve8_4_dot_partial(s0123, s4567, correction, filters);
+ d1 = convolve8_4_dot_partial(s1234, s5678, correction, filters);
+ d2 = convolve8_4_dot_partial(s2345, s6789, correction, filters);
+ d3 = convolve8_4_dot_partial(s3456, s78910, correction, filters);
+
+ d01 = vqrshrun_n_s16(vcombine_s16(vqmovn_s32(d0), vqmovn_s32(d1)), 7);
+ d23 = vqrshrun_n_s16(vcombine_s16(vqmovn_s32(d2), vqmovn_s32(d3)), 7);
+
+ vst1_lane_u32((uint32_t *)dst, vreinterpret_u32_u8(d01), 0);
+ dst += dst_stride;
+ vst1_lane_u32((uint32_t *)dst, vreinterpret_u32_u8(d01), 1);
+ dst += dst_stride;
+ vst1_lane_u32((uint32_t *)dst, vreinterpret_u32_u8(d23), 0);
+ dst += dst_stride;
+ vst1_lane_u32((uint32_t *)dst, vreinterpret_u32_u8(d23), 1);
+ dst += dst_stride;
+
+ /* Prepare block for next iteration - re-using as much as possible. */
+ /* Shuffle everything up four rows. */
+ s0123 = s4567;
+ s1234 = s5678;
+ s2345 = s6789;
+ s3456 = s78910;
+
+ src += 4 * src_stride;
+ h -= 4;
+ } while (h > 0);
+ } else {
+ const uint8x16x2_t tran_concat_tbl = vld1q_u8_x2(dot_prod_tran_concat_tbl);
+ int8x16_t s0123_lo, s0123_hi, s1234_lo, s1234_hi, s2345_lo, s2345_hi,
+ s3456_lo, s3456_hi, s4567_lo, s4567_hi, s5678_lo, s5678_hi, s6789_lo,
+ s6789_hi, s78910_lo, s78910_hi;
+ uint8x8_t d0, d1, d2, d3;
+ const uint8_t *s;
+ uint8_t *d;
+ int height;
+
+ do {
+ height = h;
+ s = src;
+ d = dst;
+
+ load_u8_8x4(s, src_stride, &t0, &t1, &t2, &t3);
+ s += 4 * src_stride;
+ t4 = vld1_u8(s);
+ s += src_stride;
+ t5 = vld1_u8(s);
+ s += src_stride;
+ t6 = vld1_u8(s);
+ s += src_stride;
+
+ /* Clamp sample range to [-128, 127] for 8-bit signed dot product. */
+ s0 = vreinterpret_s8_u8(vsub_u8(t0, range_limit));
+ s1 = vreinterpret_s8_u8(vsub_u8(t1, range_limit));
+ s2 = vreinterpret_s8_u8(vsub_u8(t2, range_limit));
+ s3 = vreinterpret_s8_u8(vsub_u8(t3, range_limit));
+ s4 = vreinterpret_s8_u8(vsub_u8(t4, range_limit));
+ s5 = vreinterpret_s8_u8(vsub_u8(t5, range_limit));
+ s6 = vreinterpret_s8_u8(vsub_u8(t6, range_limit));
+ s7 = vdup_n_s8(0);
+ s8 = vdup_n_s8(0);
+ s9 = vdup_n_s8(0);
+
+ /* This operation combines a conventional transpose and the sample permute
+ * (see horizontal case) required before computing the dot product.
+ */
+ transpose_concat_8x4(&s0, &s1, &s2, &s3, &s0123_lo, &s0123_hi,
+ tran_concat_tbl);
+ transpose_concat_8x4(&s1, &s2, &s3, &s4, &s1234_lo, &s1234_hi,
+ tran_concat_tbl);
+ transpose_concat_8x4(&s2, &s3, &s4, &s5, &s2345_lo, &s2345_hi,
+ tran_concat_tbl);
+ transpose_concat_8x4(&s3, &s4, &s5, &s6, &s3456_lo, &s3456_hi,
+ tran_concat_tbl);
+ transpose_concat_8x4(&s4, &s5, &s6, &s7, &s4567_lo, &s4567_hi,
+ tran_concat_tbl);
+ transpose_concat_8x4(&s5, &s6, &s7, &s8, &s5678_lo, &s5678_hi,
+ tran_concat_tbl);
+ transpose_concat_8x4(&s6, &s7, &s8, &s9, &s6789_lo, &s6789_hi,
+ tran_concat_tbl);
+
+ do {
+ uint8x8_t t7, t8, t9, t10;
+
+ load_u8_8x4(s, src_stride, &t7, &t8, &t9, &t10);
+
+ s7 = vreinterpret_s8_u8(vsub_u8(t7, range_limit));
+ s8 = vreinterpret_s8_u8(vsub_u8(t8, range_limit));
+ s9 = vreinterpret_s8_u8(vsub_u8(t9, range_limit));
+ s10 = vreinterpret_s8_u8(vsub_u8(t10, range_limit));
+
+ transpose_concat_8x4(&s7, &s8, &s9, &s10, &s78910_lo, &s78910_hi,
+ tran_concat_tbl);
+
+ /* Merge new data into block from previous iteration. */
+ samples_LUT.val[0] = s3456_lo;
+ samples_LUT.val[1] = s78910_lo;
+ s4567_lo = vqtbl2q_s8(samples_LUT, merge_block_tbl.val[0]);
+ s5678_lo = vqtbl2q_s8(samples_LUT, merge_block_tbl.val[1]);
+ s6789_lo = vqtbl2q_s8(samples_LUT, merge_block_tbl.val[2]);
+
+ samples_LUT.val[0] = s3456_hi;
+ samples_LUT.val[1] = s78910_hi;
+ s4567_hi = vqtbl2q_s8(samples_LUT, merge_block_tbl.val[0]);
+ s5678_hi = vqtbl2q_s8(samples_LUT, merge_block_tbl.val[1]);
+ s6789_hi = vqtbl2q_s8(samples_LUT, merge_block_tbl.val[2]);
+
+ d0 = convolve8_8_dot_partial(s0123_lo, s4567_lo, s0123_hi, s4567_hi,
+ correction, filters);
+ d1 = convolve8_8_dot_partial(s1234_lo, s5678_lo, s1234_hi, s5678_hi,
+ correction, filters);
+ d2 = convolve8_8_dot_partial(s2345_lo, s6789_lo, s2345_hi, s6789_hi,
+ correction, filters);
+ d3 = convolve8_8_dot_partial(s3456_lo, s78910_lo, s3456_hi, s78910_hi,
+ correction, filters);
+ vst1_u8(d + 0 * dst_stride, d0);
+ vst1_u8(d + 1 * dst_stride, d1);
+ vst1_u8(d + 2 * dst_stride, d2);
+ vst1_u8(d + 3 * dst_stride, d3);
+
+ /* Prepare block for next iteration - re-using as much as possible. */
+ /* Shuffle everything up four rows. */
+ s0123_lo = s4567_lo;
+ s0123_hi = s4567_hi;
+ s1234_lo = s5678_lo;
+ s1234_hi = s5678_hi;
+ s2345_lo = s6789_lo;
+ s2345_hi = s6789_hi;
+ s3456_lo = s78910_lo;
+ s3456_hi = s78910_hi;
+
+ s += 4 * src_stride;
+ d += 4 * dst_stride;
+ height -= 4;
+ } while (height > 0);
+ src += 8;
+ dst += 8;
+ w -= 8;
+ } while (w > 0);
+ }
+}
+
+void vpx_convolve8_avg_vert_neon(const uint8_t *src, ptrdiff_t src_stride,
+ uint8_t *dst, ptrdiff_t dst_stride,
+ const InterpKernel *filter, int x0_q4,
+ int x_step_q4, int y0_q4, int y_step_q4, int w,
+ int h) {
+ const int8x8_t filters = vmovn_s16(vld1q_s16(filter[y0_q4]));
+ const int16x8_t correct_tmp = vmulq_n_s16(vld1q_s16(filter[y0_q4]), 128);
+ const int32x4_t correction = vdupq_n_s32((int32_t)vaddvq_s16(correct_tmp));
+ const uint8x8_t range_limit = vdup_n_u8(128);
+ const uint8x16x3_t merge_block_tbl = vld1q_u8_x3(dot_prod_merge_block_tbl);
+ uint8x8_t t0, t1, t2, t3, t4, t5, t6;
+ int8x8_t s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10;
+ int8x16x2_t samples_LUT;
+
+ assert(!((intptr_t)dst & 3));
+ assert(!(dst_stride & 3));
+ assert(y_step_q4 == 16);
+
+ (void)x0_q4;
+ (void)x_step_q4;
+ (void)y_step_q4;
+
+ src -= 3 * src_stride;
+
+ if (w == 4) {
+ const uint8x16_t tran_concat_tbl = vld1q_u8(dot_prod_tran_concat_tbl);
+ int8x16_t s0123, s1234, s2345, s3456, s4567, s5678, s6789, s78910;
+ int32x4_t d0, d1, d2, d3;
+ uint8x8_t d01, d23, dd01, dd23;
+
+ load_u8_8x4(src, src_stride, &t0, &t1, &t2, &t3);
+ src += 4 * src_stride;
+ t4 = vld1_u8(src);
+ src += src_stride;
+ t5 = vld1_u8(src);
+ src += src_stride;
+ t6 = vld1_u8(src);
+ src += src_stride;
+
+ /* Clamp sample range to [-128, 127] for 8-bit signed dot product. */
+ s0 = vreinterpret_s8_u8(vsub_u8(t0, range_limit));
+ s1 = vreinterpret_s8_u8(vsub_u8(t1, range_limit));
+ s2 = vreinterpret_s8_u8(vsub_u8(t2, range_limit));
+ s3 = vreinterpret_s8_u8(vsub_u8(t3, range_limit));
+ s4 = vreinterpret_s8_u8(vsub_u8(t4, range_limit));
+ s5 = vreinterpret_s8_u8(vsub_u8(t5, range_limit));
+ s6 = vreinterpret_s8_u8(vsub_u8(t6, range_limit));
+ s7 = vdup_n_s8(0);
+ s8 = vdup_n_s8(0);
+ s9 = vdup_n_s8(0);
+
+ /* This operation combines a conventional transpose and the sample permute
+ * (see horizontal case) required before computing the dot product.
+ */
+ transpose_concat_4x4(&s0, &s1, &s2, &s3, &s0123, tran_concat_tbl);
+ transpose_concat_4x4(&s1, &s2, &s3, &s4, &s1234, tran_concat_tbl);
+ transpose_concat_4x4(&s2, &s3, &s4, &s5, &s2345, tran_concat_tbl);
+ transpose_concat_4x4(&s3, &s4, &s5, &s6, &s3456, tran_concat_tbl);
+ transpose_concat_4x4(&s4, &s5, &s6, &s7, &s4567, tran_concat_tbl);
+ transpose_concat_4x4(&s5, &s6, &s7, &s8, &s5678, tran_concat_tbl);
+ transpose_concat_4x4(&s6, &s7, &s8, &s9, &s6789, tran_concat_tbl);
+
+ do {
+ uint8x8_t t7, t8, t9, t10;
+
+ load_u8_8x4(src, src_stride, &t7, &t8, &t9, &t10);
+
+ s7 = vreinterpret_s8_u8(vsub_u8(t7, range_limit));
+ s8 = vreinterpret_s8_u8(vsub_u8(t8, range_limit));
+ s9 = vreinterpret_s8_u8(vsub_u8(t9, range_limit));
+ s10 = vreinterpret_s8_u8(vsub_u8(t10, range_limit));
+
+ transpose_concat_4x4(&s7, &s8, &s9, &s10, &s78910, tran_concat_tbl);
+
+ /* Merge new data into block from previous iteration. */
+ samples_LUT.val[0] = s3456;
+ samples_LUT.val[1] = s78910;
+ s4567 = vqtbl2q_s8(samples_LUT, merge_block_tbl.val[0]);
+ s5678 = vqtbl2q_s8(samples_LUT, merge_block_tbl.val[1]);
+ s6789 = vqtbl2q_s8(samples_LUT, merge_block_tbl.val[2]);
+
+ d0 = convolve8_4_dot_partial(s0123, s4567, correction, filters);
+ d1 = convolve8_4_dot_partial(s1234, s5678, correction, filters);
+ d2 = convolve8_4_dot_partial(s2345, s6789, correction, filters);
+ d3 = convolve8_4_dot_partial(s3456, s78910, correction, filters);
+
+ d01 = vqrshrun_n_s16(vcombine_s16(vqmovn_s32(d0), vqmovn_s32(d1)), 7);
+ d23 = vqrshrun_n_s16(vcombine_s16(vqmovn_s32(d2), vqmovn_s32(d3)), 7);
+
+ dd01 = load_u8(dst + 0 * dst_stride, dst_stride);
+ dd23 = load_u8(dst + 2 * dst_stride, dst_stride);
+ d01 = vrhadd_u8(d01, dd01);
+ d23 = vrhadd_u8(d23, dd23);
+
+ vst1_lane_u32((uint32_t *)dst, vreinterpret_u32_u8(d01), 0);
+ dst += dst_stride;
+ vst1_lane_u32((uint32_t *)dst, vreinterpret_u32_u8(d01), 1);
+ dst += dst_stride;
+ vst1_lane_u32((uint32_t *)dst, vreinterpret_u32_u8(d23), 0);
+ dst += dst_stride;
+ vst1_lane_u32((uint32_t *)dst, vreinterpret_u32_u8(d23), 1);
+ dst += dst_stride;
+
+ /* Prepare block for next iteration - re-using as much as possible. */
+ /* Shuffle everything up four rows. */
+ s0123 = s4567;
+ s1234 = s5678;
+ s2345 = s6789;
+ s3456 = s78910;
+
+ src += 4 * src_stride;
+ h -= 4;
+ } while (h > 0);
+ } else {
+ const uint8x16x2_t tran_concat_tbl = vld1q_u8_x2(dot_prod_tran_concat_tbl);
+ int8x16_t s0123_lo, s0123_hi, s1234_lo, s1234_hi, s2345_lo, s2345_hi,
+ s3456_lo, s3456_hi, s4567_lo, s4567_hi, s5678_lo, s5678_hi, s6789_lo,
+ s6789_hi, s78910_lo, s78910_hi;
+ uint8x8_t d0, d1, d2, d3, dd0, dd1, dd2, dd3;
+ const uint8_t *s;
+ uint8_t *d;
+ int height;
+
+ do {
+ height = h;
+ s = src;
+ d = dst;
+
+ load_u8_8x4(s, src_stride, &t0, &t1, &t2, &t3);
+ s += 4 * src_stride;
+ t4 = vld1_u8(s);
+ s += src_stride;
+ t5 = vld1_u8(s);
+ s += src_stride;
+ t6 = vld1_u8(s);
+ s += src_stride;
+
+ /* Clamp sample range to [-128, 127] for 8-bit signed dot product. */
+ s0 = vreinterpret_s8_u8(vsub_u8(t0, range_limit));
+ s1 = vreinterpret_s8_u8(vsub_u8(t1, range_limit));
+ s2 = vreinterpret_s8_u8(vsub_u8(t2, range_limit));
+ s3 = vreinterpret_s8_u8(vsub_u8(t3, range_limit));
+ s4 = vreinterpret_s8_u8(vsub_u8(t4, range_limit));
+ s5 = vreinterpret_s8_u8(vsub_u8(t5, range_limit));
+ s6 = vreinterpret_s8_u8(vsub_u8(t6, range_limit));
+ s7 = vdup_n_s8(0);
+ s8 = vdup_n_s8(0);
+ s9 = vdup_n_s8(0);
+
+ /* This operation combines a conventional transpose and the sample permute
+ * (see horizontal case) required before computing the dot product.
+ */
+ transpose_concat_8x4(&s0, &s1, &s2, &s3, &s0123_lo, &s0123_hi,
+ tran_concat_tbl);
+ transpose_concat_8x4(&s1, &s2, &s3, &s4, &s1234_lo, &s1234_hi,
+ tran_concat_tbl);
+ transpose_concat_8x4(&s2, &s3, &s4, &s5, &s2345_lo, &s2345_hi,
+ tran_concat_tbl);
+ transpose_concat_8x4(&s3, &s4, &s5, &s6, &s3456_lo, &s3456_hi,
+ tran_concat_tbl);
+ transpose_concat_8x4(&s4, &s5, &s6, &s7, &s4567_lo, &s4567_hi,
+ tran_concat_tbl);
+ transpose_concat_8x4(&s5, &s6, &s7, &s8, &s5678_lo, &s5678_hi,
+ tran_concat_tbl);
+ transpose_concat_8x4(&s6, &s7, &s8, &s9, &s6789_lo, &s6789_hi,
+ tran_concat_tbl);
+
+ do {
+ uint8x8_t t7, t8, t9, t10;
+
+ load_u8_8x4(s, src_stride, &t7, &t8, &t9, &t10);
+
+ s7 = vreinterpret_s8_u8(vsub_u8(t7, range_limit));
+ s8 = vreinterpret_s8_u8(vsub_u8(t8, range_limit));
+ s9 = vreinterpret_s8_u8(vsub_u8(t9, range_limit));
+ s10 = vreinterpret_s8_u8(vsub_u8(t10, range_limit));
+
+ transpose_concat_8x4(&s7, &s8, &s9, &s10, &s78910_lo, &s78910_hi,
+ tran_concat_tbl);
+
+ /* Merge new data into block from previous iteration. */
+ samples_LUT.val[0] = s3456_lo;
+ samples_LUT.val[1] = s78910_lo;
+ s4567_lo = vqtbl2q_s8(samples_LUT, merge_block_tbl.val[0]);
+ s5678_lo = vqtbl2q_s8(samples_LUT, merge_block_tbl.val[1]);
+ s6789_lo = vqtbl2q_s8(samples_LUT, merge_block_tbl.val[2]);
+
+ samples_LUT.val[0] = s3456_hi;
+ samples_LUT.val[1] = s78910_hi;
+ s4567_hi = vqtbl2q_s8(samples_LUT, merge_block_tbl.val[0]);
+ s5678_hi = vqtbl2q_s8(samples_LUT, merge_block_tbl.val[1]);
+ s6789_hi = vqtbl2q_s8(samples_LUT, merge_block_tbl.val[2]);
+
+ d0 = convolve8_8_dot_partial(s0123_lo, s4567_lo, s0123_hi, s4567_hi,
+ correction, filters);
+ d1 = convolve8_8_dot_partial(s1234_lo, s5678_lo, s1234_hi, s5678_hi,
+ correction, filters);
+ d2 = convolve8_8_dot_partial(s2345_lo, s6789_lo, s2345_hi, s6789_hi,
+ correction, filters);
+ d3 = convolve8_8_dot_partial(s3456_lo, s78910_lo, s3456_hi, s78910_hi,
+ correction, filters);
+
+ dd0 = vld1_u8(d + 0 * dst_stride);
+ dd1 = vld1_u8(d + 1 * dst_stride);
+ dd2 = vld1_u8(d + 2 * dst_stride);
+ dd3 = vld1_u8(d + 3 * dst_stride);
+ d0 = vrhadd_u8(d0, dd0);
+ d1 = vrhadd_u8(d1, dd1);
+ d2 = vrhadd_u8(d2, dd2);
+ d3 = vrhadd_u8(d3, dd3);
+
+ vst1_u8(d + 0 * dst_stride, d0);
+ vst1_u8(d + 1 * dst_stride, d1);
+ vst1_u8(d + 2 * dst_stride, d2);
+ vst1_u8(d + 3 * dst_stride, d3);
+
+ /* Prepare block for next iteration - re-using as much as possible. */
+ /* Shuffle everything up four rows. */
+ s0123_lo = s4567_lo;
+ s0123_hi = s4567_hi;
+ s1234_lo = s5678_lo;
+ s1234_hi = s5678_hi;
+ s2345_lo = s6789_lo;
+ s2345_hi = s6789_hi;
+ s3456_lo = s78910_lo;
+ s3456_hi = s78910_hi;
+
+ s += 4 * src_stride;
+ d += 4 * dst_stride;
+ height -= 4;
+ } while (height > 0);
+ src += 8;
+ dst += 8;
+ w -= 8;
+ } while (w > 0);
+ }
+}
+
+#else
+
static INLINE void store_u8_8x8(uint8_t *s, const ptrdiff_t p,
const uint8x8_t s0, const uint8x8_t s1,
const uint8x8_t s2, const uint8x8_t s3,
@@ -145,7 +881,7 @@
src += 4;
dst += 4;
w -= 4;
- } while (w > 0);
+ } while (w != 0);
} else {
const int16x8_t filter3 = vdupq_lane_s16(vget_low_s16(filters), 3);
const int16x8_t filter4 = vdupq_lane_s16(vget_high_s16(filters), 0);
@@ -296,7 +1032,7 @@
s += 8;
d += 8;
width -= 8;
- } while (width > 0);
+ } while (width != 0);
src += 8 * src_stride;
dst += 8 * dst_stride;
h -= 8;
@@ -402,7 +1138,7 @@
src += 4;
dst += 4;
w -= 4;
- } while (w > 0);
+ } while (w != 0);
} else {
const int16x8_t filter3 = vdupq_lane_s16(vget_low_s16(filters), 3);
const int16x8_t filter4 = vdupq_lane_s16(vget_high_s16(filters), 0);
@@ -586,7 +1322,7 @@
s += 8;
d += 8;
width -= 8;
- } while (width > 0);
+ } while (width != 0);
src += 8 * src_stride;
dst += 8 * dst_stride;
h -= 8;
@@ -679,7 +1415,7 @@
s5 = s9;
s6 = s10;
h -= 4;
- } while (h > 0);
+ } while (h != 0);
} else {
const int16x8_t filter3 = vdupq_lane_s16(vget_low_s16(filters), 3);
const int16x8_t filter4 = vdupq_lane_s16(vget_high_s16(filters), 0);
@@ -759,11 +1495,11 @@
s5 = s9;
s6 = s10;
height -= 4;
- } while (height > 0);
+ } while (height != 0);
src += 8;
dst += 8;
w -= 8;
- } while (w > 0);
+ } while (w != 0);
}
}
@@ -860,7 +1596,7 @@
s5 = s9;
s6 = s10;
h -= 4;
- } while (h > 0);
+ } while (h != 0);
} else {
const int16x8_t filter3 = vdupq_lane_s16(vget_low_s16(filters), 3);
const int16x8_t filter4 = vdupq_lane_s16(vget_high_s16(filters), 0);
@@ -950,10 +1686,12 @@
s5 = s9;
s6 = s10;
height -= 4;
- } while (height > 0);
+ } while (height != 0);
src += 8;
dst += 8;
w -= 8;
- } while (w > 0);
+ } while (w != 0);
}
}
+
+#endif
--- a/vpx_dsp/arm/vpx_convolve8_neon.h
+++ b/vpx_dsp/arm/vpx_convolve8_neon.h
@@ -72,6 +72,107 @@
*s7 = vld1q_u8(s);
}
+#if defined(__aarch64__) && defined(__ARM_FEATURE_DOTPROD) && \
+ (__ARM_FEATURE_DOTPROD == 1)
+
+static INLINE int32x4_t convolve8_4_dot_partial(const int8x16_t samples_lo,
+ const int8x16_t samples_hi,
+ const int32x4_t correction,
+ const int8x8_t filters) {
+ /* Sample range-clamping and permutation are performed by the caller. */
+ int32x4_t sum;
+
+ /* Accumulate dot product into 'correction' to account for range clamp. */
+ sum = vdotq_lane_s32(correction, samples_lo, filters, 0);
+ sum = vdotq_lane_s32(sum, samples_hi, filters, 1);
+
+ /* Narrowing and packing is performed by the caller. */
+ return sum;
+}
+
+static INLINE int32x4_t convolve8_4_dot(uint8x16_t samples,
+ const int8x8_t filters,
+ const int32x4_t correction,
+ const uint8x16_t range_limit,
+ const uint8x16x2_t permute_tbl) {
+ int8x16_t clamped_samples, permuted_samples[2];
+ int32x4_t sum;
+
+ /* Clamp sample range to [-128, 127] for 8-bit signed dot product. */
+ clamped_samples = vreinterpretq_s8_u8(vsubq_u8(samples, range_limit));
+
+ /* Permute samples ready for dot product. */
+ /* { 0, 1, 2, 3, 1, 2, 3, 4, 2, 3, 4, 5, 3, 4, 5, 6 } */
+ permuted_samples[0] = vqtbl1q_s8(clamped_samples, permute_tbl.val[0]);
+ /* { 4, 5, 6, 7, 5, 6, 7, 8, 6, 7, 8, 9, 7, 8, 9, 10 } */
+ permuted_samples[1] = vqtbl1q_s8(clamped_samples, permute_tbl.val[1]);
+
+ /* Accumulate dot product into 'correction' to account for range clamp. */
+ sum = vdotq_lane_s32(correction, permuted_samples[0], filters, 0);
+ sum = vdotq_lane_s32(sum, permuted_samples[1], filters, 1);
+
+ /* Narrowing and packing is performed by the caller. */
+ return sum;
+}
+
+static INLINE uint8x8_t convolve8_8_dot_partial(const int8x16_t samples0_lo,
+ const int8x16_t samples0_hi,
+ const int8x16_t samples1_lo,
+ const int8x16_t samples1_hi,
+ const int32x4_t correction,
+ const int8x8_t filters) {
+ /* Sample range-clamping and permutation are performed by the caller. */
+ int32x4_t sum0, sum1;
+ int16x8_t sum;
+
+ /* Accumulate dot product into 'correction' to account for range clamp. */
+ /* First 4 output values. */
+ sum0 = vdotq_lane_s32(correction, samples0_lo, filters, 0);
+ sum0 = vdotq_lane_s32(sum0, samples0_hi, filters, 1);
+ /* Second 4 output values. */
+ sum1 = vdotq_lane_s32(correction, samples1_lo, filters, 0);
+ sum1 = vdotq_lane_s32(sum1, samples1_hi, filters, 1);
+
+ /* Narrow and re-pack. */
+ sum = vcombine_s16(vqmovn_s32(sum0), vqmovn_s32(sum1));
+ return vqrshrun_n_s16(sum, 7);
+}
+
+static INLINE uint8x8_t convolve8_8_dot(uint8x16_t samples,
+ const int8x8_t filters,
+ const int32x4_t correction,
+ const uint8x16_t range_limit,
+ const uint8x16x3_t permute_tbl) {
+ int8x16_t clamped_samples, permuted_samples[3];
+ int32x4_t sum0, sum1;
+ int16x8_t sum;
+
+ /* Clamp sample range to [-128, 127] for 8-bit signed dot product. */
+ clamped_samples = vreinterpretq_s8_u8(vsubq_u8(samples, range_limit));
+
+ /* Permute samples ready for dot product. */
+ /* { 0, 1, 2, 3, 1, 2, 3, 4, 2, 3, 4, 5, 3, 4, 5, 6 } */
+ permuted_samples[0] = vqtbl1q_s8(clamped_samples, permute_tbl.val[0]);
+ /* { 4, 5, 6, 7, 5, 6, 7, 8, 6, 7, 8, 9, 7, 8, 9, 10 } */
+ permuted_samples[1] = vqtbl1q_s8(clamped_samples, permute_tbl.val[1]);
+ /* { 8, 9, 10, 11, 9, 10, 11, 12, 10, 11, 12, 13, 11, 12, 13, 14 } */
+ permuted_samples[2] = vqtbl1q_s8(clamped_samples, permute_tbl.val[2]);
+
+ /* Accumulate dot product into 'correction' to account for range clamp. */
+ /* First 4 output values. */
+ sum0 = vdotq_lane_s32(correction, permuted_samples[0], filters, 0);
+ sum0 = vdotq_lane_s32(sum0, permuted_samples[1], filters, 1);
+ /* Second 4 output values. */
+ sum1 = vdotq_lane_s32(correction, permuted_samples[1], filters, 0);
+ sum1 = vdotq_lane_s32(sum1, permuted_samples[2], filters, 1);
+
+ /* Narrow and re-pack. */
+ sum = vcombine_s16(vqmovn_s32(sum0), vqmovn_s32(sum1));
+ return vqrshrun_n_s16(sum, 7);
+}
+
+#endif
+
static INLINE int16x4_t convolve8_4(const int16x4_t s0, const int16x4_t s1,
const int16x4_t s2, const int16x4_t s3,
const int16x4_t s4, const int16x4_t s5,
--- a/vpx_dsp/arm/vpx_convolve_avg_neon.c
+++ b/vpx_dsp/arm/vpx_convolve_avg_neon.c
@@ -43,7 +43,7 @@
vst1_lane_u32((uint32_t *)dst, vreinterpret_u32_u8(dd0), 1);
dst += dst_stride;
h -= 2;
- } while (h > 0);
+ } while (h != 0);
} else if (w == 8) { // avg8
uint8x8_t s0, s1, d0, d1;
uint8x16_t s01, d01;
@@ -64,7 +64,7 @@
vst1_u8(dst, vget_high_u8(d01));
dst += dst_stride;
h -= 2;
- } while (h > 0);
+ } while (h != 0);
} else if (w < 32) { // avg16
uint8x16_t s0, s1, d0, d1;
do {
@@ -83,7 +83,7 @@
vst1q_u8(dst, d1);
dst += dst_stride;
h -= 2;
- } while (h > 0);
+ } while (h != 0);
} else if (w == 32) { // avg32
uint8x16_t s0, s1, s2, s3, d0, d1, d2, d3;
do {
@@ -110,7 +110,7 @@
vst1q_u8(dst + 16, d3);
dst += dst_stride;
h -= 2;
- } while (h > 0);
+ } while (h != 0);
} else { // avg64
uint8x16_t s0, s1, s2, s3, d0, d1, d2, d3;
do {
--- a/vpx_dsp/arm/vpx_convolve_copy_neon.c
+++ b/vpx_dsp/arm/vpx_convolve_copy_neon.c
@@ -33,7 +33,7 @@
src += src_stride;
dst += dst_stride;
h -= 2;
- } while (h > 0);
+ } while (h != 0);
} else if (w == 8) { // copy8
uint8x8_t s0, s1;
do {
@@ -47,7 +47,7 @@
vst1_u8(dst, s1);
dst += dst_stride;
h -= 2;
- } while (h > 0);
+ } while (h != 0);
} else if (w < 32) { // copy16
uint8x16_t s0, s1;
do {
@@ -61,7 +61,7 @@
vst1q_u8(dst, s1);
dst += dst_stride;
h -= 2;
- } while (h > 0);
+ } while (h != 0);
} else if (w == 32) { // copy32
uint8x16_t s0, s1, s2, s3;
do {
@@ -79,7 +79,7 @@
vst1q_u8(dst + 16, s3);
dst += dst_stride;
h -= 2;
- } while (h > 0);
+ } while (h != 0);
} else { // copy64
uint8x16_t s0, s1, s2, s3;
do {
--- a/vpx_ports/x86.h
+++ b/vpx_ports/x86.h
@@ -223,6 +223,8 @@
}
}
+ (void)reg_eax; // Avoid compiler warning on unused-but-set variable.
+
return flags & mask;
}
@@ -240,7 +242,7 @@
// x86_readtsc directly, but prevent the CPU's out-of-order execution from
// affecting the measurement (by having earlier/later instructions be evaluated
// in the time interval). See the white paper, "How to Benchmark Code
-// Execution Times on Intel® IA-32 and IA-64 Instruction Set Architectures" by
+// Execution Times on Intel(R) IA-32 and IA-64 Instruction Set Architectures" by
// Gabriele Paoloni for more information.
//
// If you are timing a large function (CPU time > a couple of seconds), use
@@ -306,7 +308,13 @@
static INLINE unsigned int x86_tsc_start(void) {
unsigned int reg_eax, reg_ebx, reg_ecx, reg_edx;
+ // This call should not be removed. See function notes above.
cpuid(0, 0, reg_eax, reg_ebx, reg_ecx, reg_edx);
+ // Avoid compiler warnings on unused-but-set variables.
+ (void)reg_eax;
+ (void)reg_ebx;
+ (void)reg_ecx;
+ (void)reg_edx;
return x86_readtsc();
}
@@ -313,7 +321,13 @@
static INLINE unsigned int x86_tsc_end(void) {
uint32_t v = x86_readtscp();
unsigned int reg_eax, reg_ebx, reg_ecx, reg_edx;
+ // This call should not be removed. See function notes above.
cpuid(0, 0, reg_eax, reg_ebx, reg_ecx, reg_edx);
+ // Avoid compiler warnings on unused-but-set variables.
+ (void)reg_eax;
+ (void)reg_ebx;
+ (void)reg_ecx;
+ (void)reg_edx;
return v;
}
--- a/vpx_scale/generic/yv12config.c
+++ b/vpx_scale/generic/yv12config.c
@@ -64,6 +64,10 @@
if (!ybf->buffer_alloc) {
ybf->buffer_alloc = (uint8_t *)vpx_memalign(32, frame_size);
+ if (!ybf->buffer_alloc) {
+ ybf->buffer_alloc_sz = 0;
+ return -1;
+ }
#if defined(__has_feature)
#if __has_feature(memory_sanitizer)
// This memset is needed for fixing the issue of using uninitialized
@@ -75,7 +79,7 @@
ybf->buffer_alloc_sz = frame_size;
}
- if (!ybf->buffer_alloc || ybf->buffer_alloc_sz < frame_size) return -1;
+ if (ybf->buffer_alloc_sz < frame_size) return -1;
/* Only support allocating buffers that have a border that's a multiple
* of 32. The border restriction is required to get 16-byte alignment of
--- a/vpxenc.c
+++ b/vpxenc.c
@@ -114,10 +114,6 @@
ARG_DEF(NULL, "pass", 1, "Pass to execute (1/2)");
static const arg_def_t fpf_name =
ARG_DEF(NULL, "fpf", 1, "First pass statistics file name");
-#if CONFIG_FP_MB_STATS
-static const arg_def_t fpmbf_name =
- ARG_DEF(NULL, "fpmbf", 1, "First pass block statistics file name");
-#endif
static const arg_def_t limit =
ARG_DEF(NULL, "limit", 1, "Stop encoding after n input frames");
static const arg_def_t skip =
@@ -287,6 +283,64 @@
&buf_sz, &buf_initial_sz, &buf_optimal_sz, NULL
};
+#if CONFIG_VP9_ENCODER
+static const arg_def_t use_vizier_rc_params =
+ ARG_DEF(NULL, "use-vizier-rc-params", 1, "Use vizier rc params");
+static const arg_def_t active_wq_factor =
+ ARG_DEF(NULL, "active-wq-factor", 1, "Active worst quality factor");
+static const arg_def_t err_per_mb_factor =
+ ARG_DEF(NULL, "err-per-mb-factor", 1, "Error per macroblock factor");
+static const arg_def_t sr_default_decay_limit = ARG_DEF(
+ NULL, "sr-default-decay-limit", 1, "Second reference default decay limit");
+static const arg_def_t sr_diff_factor =
+ ARG_DEF(NULL, "sr-diff-factor", 1, "Second reference diff factor");
+static const arg_def_t kf_err_per_mb_factor = ARG_DEF(
+ NULL, "kf-err-per-mb-factor", 1, "Keyframe error per macroblock factor");
+static const arg_def_t kf_frame_min_boost_factor =
+ ARG_DEF(NULL, "kf-frame-min-boost-factor", 1, "Keyframe min boost");
+static const arg_def_t kf_frame_max_boost_first_factor =
+ ARG_DEF(NULL, "kf-frame-max-boost-first-factor", 1,
+ "Max keyframe boost adjustment factor for first frame");
+static const arg_def_t kf_frame_max_boost_subs_factor =
+ ARG_DEF(NULL, "kf-frame-max-boost-subs-factor", 1,
+ "Max boost adjustment factor for subsequent KFs");
+static const arg_def_t kf_max_total_boost_factor = ARG_DEF(
+ NULL, "kf-max-total-boost-factor", 1, "Keyframe max total boost factor");
+static const arg_def_t gf_max_total_boost_factor =
+ ARG_DEF(NULL, "gf-max-total-boost-factor", 1,
+ "Golden frame max total boost factor");
+static const arg_def_t gf_frame_max_boost_factor =
+ ARG_DEF(NULL, "gf-frame-max-boost-factor", 1,
+ "Golden frame max per frame boost factor");
+static const arg_def_t zm_factor =
+ ARG_DEF(NULL, "zm-factor", 1, "Zero motion power factor");
+static const arg_def_t rd_mult_inter_qp_fac =
+ ARG_DEF(NULL, "rd-mult-inter-qp-fac", 1,
+ "RD multiplier adjustment for inter frames");
+static const arg_def_t rd_mult_arf_qp_fac =
+ ARG_DEF(NULL, "rd-mult-arf-qp-fac", 1,
+ "RD multiplier adjustment for alt-ref frames");
+static const arg_def_t rd_mult_key_qp_fac = ARG_DEF(
+ NULL, "rd-mult-key-qp-fac", 1, "RD multiplier adjustment for key frames");
+static const arg_def_t *vizier_rc_args[] = { &use_vizier_rc_params,
+ &active_wq_factor,
+ &err_per_mb_factor,
+ &sr_default_decay_limit,
+ &sr_diff_factor,
+ &kf_err_per_mb_factor,
+ &kf_frame_min_boost_factor,
+ &kf_frame_max_boost_first_factor,
+ &kf_frame_max_boost_subs_factor,
+ &kf_max_total_boost_factor,
+ &gf_max_total_boost_factor,
+ &gf_frame_max_boost_factor,
+ &zm_factor,
+ &rd_mult_inter_qp_fac,
+ &rd_mult_arf_qp_fac,
+ &rd_mult_key_qp_fac,
+ NULL };
+#endif
+
static const arg_def_t bias_pct =
ARG_DEF(NULL, "bias-pct", 1, "CBR/VBR bias (0=CBR, 100=VBR)");
static const arg_def_t minsection_pct =
@@ -573,6 +627,8 @@
#if CONFIG_VP9_ENCODER
fprintf(fout, "\nVP9 Specific Options:\n");
arg_show_usage(fout, vp9_args);
+ fprintf(fout, "\nVizier Rate Control Options:\n");
+ arg_show_usage(fout, vizier_rc_args);
#endif
fprintf(fout,
"\nStream timebase (--timebase):\n"
@@ -614,9 +670,6 @@
struct vpx_codec_enc_cfg cfg;
const char *out_fn;
const char *stats_fn;
-#if CONFIG_FP_MB_STATS
- const char *fpmb_stats_fn;
-#endif
stereo_format_t stereo_fmt;
int arg_ctrls[ARG_CTRL_CNT_MAX][2];
int arg_ctrl_cnt;
@@ -644,9 +697,6 @@
uint64_t cx_time;
size_t nbytes;
stats_io_t stats;
-#if CONFIG_FP_MB_STATS
- stats_io_t fpmb_stats;
-#endif
struct vpx_image *img;
vpx_codec_ctx_t decoder;
int mismatch_seen;
@@ -883,10 +933,6 @@
config->out_fn = arg.val;
} else if (arg_match(&arg, &fpf_name, argi)) {
config->stats_fn = arg.val;
-#if CONFIG_FP_MB_STATS
- } else if (arg_match(&arg, &fpmbf_name, argi)) {
- config->fpmb_stats_fn = arg.val;
-#endif
} else if (arg_match(&arg, &use_webm, argi)) {
#if CONFIG_WEBM_IO
config->write_webm = 1;
@@ -983,6 +1029,40 @@
config->cfg.kf_max_dist = arg_parse_uint(&arg);
} else if (arg_match(&arg, &kf_disabled, argi)) {
config->cfg.kf_mode = VPX_KF_DISABLED;
+#if CONFIG_VP9_ENCODER
+ } else if (arg_match(&arg, &use_vizier_rc_params, argi)) {
+ config->cfg.use_vizier_rc_params = arg_parse_int(&arg);
+ } else if (arg_match(&arg, &active_wq_factor, argi)) {
+ config->cfg.active_wq_factor = arg_parse_rational(&arg);
+ } else if (arg_match(&arg, &err_per_mb_factor, argi)) {
+ config->cfg.err_per_mb_factor = arg_parse_rational(&arg);
+ } else if (arg_match(&arg, &sr_default_decay_limit, argi)) {
+ config->cfg.sr_default_decay_limit = arg_parse_rational(&arg);
+ } else if (arg_match(&arg, &sr_diff_factor, argi)) {
+ config->cfg.sr_diff_factor = arg_parse_rational(&arg);
+ } else if (arg_match(&arg, &kf_err_per_mb_factor, argi)) {
+ config->cfg.kf_err_per_mb_factor = arg_parse_rational(&arg);
+ } else if (arg_match(&arg, &kf_frame_min_boost_factor, argi)) {
+ config->cfg.kf_frame_min_boost_factor = arg_parse_rational(&arg);
+ } else if (arg_match(&arg, &kf_frame_max_boost_first_factor, argi)) {
+ config->cfg.kf_frame_max_boost_first_factor = arg_parse_rational(&arg);
+ } else if (arg_match(&arg, &kf_frame_max_boost_subs_factor, argi)) {
+ config->cfg.kf_frame_max_boost_subs_factor = arg_parse_rational(&arg);
+ } else if (arg_match(&arg, &kf_max_total_boost_factor, argi)) {
+ config->cfg.kf_max_total_boost_factor = arg_parse_rational(&arg);
+ } else if (arg_match(&arg, &gf_max_total_boost_factor, argi)) {
+ config->cfg.gf_max_total_boost_factor = arg_parse_rational(&arg);
+ } else if (arg_match(&arg, &gf_frame_max_boost_factor, argi)) {
+ config->cfg.gf_frame_max_boost_factor = arg_parse_rational(&arg);
+ } else if (arg_match(&arg, &zm_factor, argi)) {
+ config->cfg.zm_factor = arg_parse_rational(&arg);
+ } else if (arg_match(&arg, &rd_mult_inter_qp_fac, argi)) {
+ config->cfg.rd_mult_inter_qp_fac = arg_parse_rational(&arg);
+ } else if (arg_match(&arg, &rd_mult_arf_qp_fac, argi)) {
+ config->cfg.rd_mult_arf_qp_fac = arg_parse_rational(&arg);
+ } else if (arg_match(&arg, &rd_mult_key_qp_fac, argi)) {
+ config->cfg.rd_mult_key_qp_fac = arg_parse_rational(&arg);
+#endif
#if CONFIG_VP9_HIGHBITDEPTH
} else if (arg_match(&arg, &test16bitinternalarg, argi)) {
if (strcmp(global->codec->name, "vp9") == 0) {
@@ -1075,17 +1155,6 @@
fatal("Stream %d: duplicate stats file (from stream %d)",
streami->index, stream->index);
}
-
-#if CONFIG_FP_MB_STATS
- /* Check for two streams sharing a mb stats file. */
- if (streami != stream) {
- const char *a = stream->config.fpmb_stats_fn;
- const char *b = streami->config.fpmb_stats_fn;
- if (a && b && !strcmp(a, b))
- fatal("Stream %d: duplicate mb stats file (from stream %d)",
- streami->index, stream->index);
- }
-#endif
}
}
@@ -1177,6 +1246,10 @@
SHOW(kf_mode);
SHOW(kf_min_dist);
SHOW(kf_max_dist);
+ // Temporary use for debug
+ SHOW(use_vizier_rc_params);
+ SHOW(active_wq_factor.num);
+ SHOW(active_wq_factor.den);
}
static void open_output_file(struct stream_state *stream,
@@ -1240,26 +1313,11 @@
fatal("Failed to open statistics store");
}
-#if CONFIG_FP_MB_STATS
- if (stream->config.fpmb_stats_fn) {
- if (!stats_open_file(&stream->fpmb_stats, stream->config.fpmb_stats_fn,
- pass))
- fatal("Failed to open mb statistics store");
- } else {
- if (!stats_open_mem(&stream->fpmb_stats, pass))
- fatal("Failed to open mb statistics store");
- }
-#endif
-
stream->config.cfg.g_pass = global->passes == 2
? pass ? VPX_RC_LAST_PASS : VPX_RC_FIRST_PASS
: VPX_RC_ONE_PASS;
if (pass) {
stream->config.cfg.rc_twopass_stats_in = stats_get(&stream->stats);
-#if CONFIG_FP_MB_STATS
- stream->config.cfg.rc_firstpass_mb_stats_in =
- stats_get(&stream->fpmb_stats);
-#endif
}
stream->cx_time = 0;
@@ -1471,13 +1529,6 @@
pkt->data.twopass_stats.sz);
stream->nbytes += pkt->data.raw.sz;
break;
-#if CONFIG_FP_MB_STATS
- case VPX_CODEC_FPMB_STATS_PKT:
- stats_write(&stream->fpmb_stats, pkt->data.firstpass_mb_stats.buf,
- pkt->data.firstpass_mb_stats.sz);
- stream->nbytes += pkt->data.raw.sz;
- break;
-#endif
case VPX_CODEC_PSNR_PKT:
if (global->show_psnr) {
@@ -1636,6 +1687,7 @@
int res = 0;
memset(&input, 0, sizeof(input));
+ memset(&raw, 0, sizeof(raw));
exec_name = argv_[0];
/* Setup default input stream settings */
@@ -1781,14 +1833,10 @@
FOREACH_STREAM(show_stream_config(stream, &global, &input));
if (pass == (global.pass ? global.pass - 1 : 0)) {
- if (input.file_type == FILE_TYPE_Y4M)
- /*The Y4M reader does its own allocation.
- Just initialize this here to avoid problems if we never read any
- frames.*/
- memset(&raw, 0, sizeof(raw));
- else
+ // The Y4M reader does its own allocation.
+ if (input.file_type != FILE_TYPE_Y4M) {
vpx_img_alloc(&raw, input.fmt, input.width, input.height, 32);
-
+ }
FOREACH_STREAM(stream->rate_hist = init_rate_histogram(
&stream->config.cfg, &global.framerate));
}
@@ -1973,10 +2021,6 @@
FOREACH_STREAM(close_output_file(stream, global.codec->fourcc));
FOREACH_STREAM(stats_close(&stream->stats, global.passes - 1));
-
-#if CONFIG_FP_MB_STATS
- FOREACH_STREAM(stats_close(&stream->fpmb_stats, global.passes - 1));
-#endif
if (global.pass) break;
}
--- a/y4minput.c
+++ b/y4minput.c
@@ -10,6 +10,7 @@
* Based on code from the OggTheora software codec source code,
* Copyright (C) 2002-2010 The Xiph.Org Foundation and contributors.
*/
+#include <assert.h>
#include <errno.h>
#include <stdlib.h>
#include <string.h>
@@ -52,15 +53,8 @@
}
static int y4m_parse_tags(y4m_input *_y4m, char *_tags) {
- int got_w;
- int got_h;
- int got_fps;
- int got_interlace;
- int got_par;
- int got_chroma;
char *p;
char *q;
- got_w = got_h = got_fps = got_interlace = got_par = got_chroma = 0;
for (p = _tags;; p = q) {
/*Skip any leading spaces.*/
while (*p == ' ') p++;
@@ -73,12 +67,10 @@
switch (p[0]) {
case 'W': {
if (sscanf(p + 1, "%d", &_y4m->pic_w) != 1) return -1;
- got_w = 1;
break;
}
case 'H': {
if (sscanf(p + 1, "%d", &_y4m->pic_h) != 1) return -1;
- got_h = 1;
break;
}
case 'F': {
@@ -85,12 +77,10 @@
if (sscanf(p + 1, "%d:%d", &_y4m->fps_n, &_y4m->fps_d) != 2) {
return -1;
}
- got_fps = 1;
break;
}
case 'I': {
_y4m->interlace = p[1];
- got_interlace = 1;
break;
}
case 'A': {
@@ -97,7 +87,6 @@
if (sscanf(p + 1, "%d:%d", &_y4m->par_n, &_y4m->par_d) != 2) {
return -1;
}
- got_par = 1;
break;
}
case 'C': {
@@ -104,21 +93,93 @@
if (q - p > 16) return -1;
memcpy(_y4m->chroma_type, p + 1, q - p - 1);
_y4m->chroma_type[q - p - 1] = '\0';
- got_chroma = 1;
break;
}
/*Ignore unknown tags.*/
}
}
- if (!got_w || !got_h || !got_fps) return -1;
- if (!got_interlace) _y4m->interlace = '?';
- if (!got_par) _y4m->par_n = _y4m->par_d = 0;
- /*Chroma-type is not specified in older files, e.g., those generated by
- mplayer.*/
- if (!got_chroma) strcpy(_y4m->chroma_type, "420");
return 0;
}
+// Copy a single tag into the buffer, along with a null character.
+// Returns 0 if any file IO errors occur.
+static int copy_tag(char *buf, size_t buf_len, char *end_tag, FILE *file) {
+ size_t i;
+ assert(buf_len >= 1);
+ // Skip leading space characters.
+ do {
+ if (!file_read(buf, 1, file)) {
+ return 0;
+ }
+ } while (buf[0] == ' ');
+
+ // If we hit the newline, treat this as the "empty" tag.
+ if (buf[0] == '\n') {
+ buf[0] = '\0';
+ *end_tag = '\n';
+ return 1;
+ }
+
+ // Copy over characters until a space is hit, or the buffer is exhausted.
+ for (i = 1; i < buf_len; ++i) {
+ if (!file_read(buf + i, 1, file)) {
+ return 0;
+ }
+ if (buf[i] == ' ' || buf[i] == '\n') {
+ break;
+ }
+ }
+ if (i == buf_len) {
+ fprintf(stderr, "Error: Y4M header tags must be less than %lu characters\n",
+ (unsigned long)i);
+ return 0;
+ }
+ *end_tag = buf[i];
+ buf[i] = '\0';
+ return 1;
+}
+
+/* Returns 1 if tags were parsed successfully, 0 otherwise. */
+static int parse_tags(y4m_input *y4m_ctx, FILE *file) {
+ char tag[256];
+ char end; /* Character denoting the end of the tag, ' ' or '\n'. */
+ /* Set Y4M tags to defaults, updating them as processing occurs. Mandatory
+ fields are marked with -1 and will be checked after the tags are parsed. */
+ y4m_ctx->pic_w = -1;
+ y4m_ctx->pic_h = -1;
+ y4m_ctx->fps_n = -1; /* Also serves as marker for fps_d */
+ y4m_ctx->par_n = 0;
+ y4m_ctx->par_d = 0;
+ y4m_ctx->interlace = '?';
+ snprintf(y4m_ctx->chroma_type, sizeof(y4m_ctx->chroma_type), "420");
+
+ /* Find one tag at a time. */
+ do {
+ if (!copy_tag(tag, sizeof(tag), &end, file)) {
+ return 0;
+ }
+ /* y4m_parse_tags returns 0 on success. */
+ if (y4m_parse_tags(y4m_ctx, tag)) {
+ return 0;
+ }
+ } while (end != '\n');
+
+ /* Check the mandatory fields. */
+ if (y4m_ctx->pic_w == -1) {
+ fprintf(stderr, "Width field missing\n");
+ return 0;
+ }
+ if (y4m_ctx->pic_h == -1) {
+ fprintf(stderr, "Height field missing\n");
+ return 0;
+ }
+ if (y4m_ctx->fps_n == -1) {
+ fprintf(stderr, "FPS field missing\n");
+ return 0;
+ }
+ return 1;
+}
+
/*All anti-aliasing filters in the following conversion functions are based on
one of two window functions:
The 6-tap Lanczos window (for down-sampling and shifts):
@@ -225,26 +286,6 @@
}
}
-/*Handles both 422 and 420mpeg2 to 422jpeg and 420jpeg, respectively.*/
-static void y4m_convert_42xmpeg2_42xjpeg(y4m_input *_y4m, unsigned char *_dst,
- unsigned char *_aux) {
- int c_w;
- int c_h;
- int c_sz;
- int pli;
- /*Skip past the luma data.*/
- _dst += _y4m->pic_w * _y4m->pic_h;
- /*Compute the size of each chroma plane.*/
- c_w = (_y4m->pic_w + _y4m->dst_c_dec_h - 1) / _y4m->dst_c_dec_h;
- c_h = (_y4m->pic_h + _y4m->dst_c_dec_v - 1) / _y4m->dst_c_dec_v;
- c_sz = c_w * c_h;
- for (pli = 1; pli < 3; pli++) {
- y4m_42xmpeg2_42xjpeg_helper(_dst, _aux, c_w, c_h);
- _dst += c_sz;
- _aux += c_sz;
- }
-}
-
/*This format is only used for interlaced content, but is included for
completeness.
@@ -785,277 +826,271 @@
(void)_aux;
}
-int y4m_input_open(y4m_input *_y4m, FILE *_fin, char *_skip, int _nskip,
- int only_420) {
- char buffer[80] = { 0 };
- int ret;
- int i;
- /*Read until newline, or 80 cols, whichever happens first.*/
- for (i = 0; i < 79; i++) {
- if (_nskip > 0) {
- buffer[i] = *_skip++;
- _nskip--;
- } else {
- if (!file_read(buffer + i, 1, _fin)) return -1;
- }
- if (buffer[i] == '\n') break;
+static const char TAG[] = "YUV4MPEG2";
+
+int y4m_input_open(y4m_input *y4m_ctx, FILE *file, char *skip_buffer,
+ int num_skip, int only_420) {
+ // File must start with |TAG|.
+ char tag_buffer[9]; // 9 == strlen(TAG)
+ // Read as much as possible from |skip_buffer|, which were characters
+ // that were previously read from the file to do input-type detection.
+ assert(num_skip >= 0 && num_skip <= 8);
+ if (num_skip > 0) {
+ memcpy(tag_buffer, skip_buffer, num_skip);
}
- /*We skipped too much header data.*/
- if (_nskip > 0) return -1;
- if (i == 79) {
- fprintf(stderr, "Error parsing header; not a YUV2MPEG2 file?\n");
+ // Start reading from the file now that the |skip_buffer| is depleted.
+ if (!file_read(tag_buffer + num_skip, 9 - num_skip, file)) {
return -1;
}
- buffer[i] = '\0';
- if (memcmp(buffer, "YUV4MPEG", 8)) {
- fprintf(stderr, "Incomplete magic for YUV4MPEG file.\n");
+ if (memcmp(TAG, tag_buffer, 9) != 0) {
+ fprintf(stderr, "Error parsing header: must start with %s\n", TAG);
return -1;
}
- if (buffer[8] != '2') {
- fprintf(stderr, "Incorrect YUV input file version; YUV4MPEG2 required.\n");
+ // Next character must be a space.
+ if (!file_read(tag_buffer, 1, file) || tag_buffer[0] != ' ') {
+ fprintf(stderr, "Error parsing header: space must follow %s\n", TAG);
+ return -1;
}
- ret = y4m_parse_tags(_y4m, buffer + 5);
- if (ret < 0) {
- fprintf(stderr, "Error parsing YUV4MPEG2 header.\n");
- return ret;
+ if (!parse_tags(y4m_ctx, file)) {
+ fprintf(stderr, "Error parsing %s header.\n", TAG);
}
- if (_y4m->interlace == '?') {
+ if (y4m_ctx->interlace == '?') {
fprintf(stderr,
"Warning: Input video interlacing format unknown; "
"assuming progressive scan.\n");
- } else if (_y4m->interlace != 'p') {
+ } else if (y4m_ctx->interlace != 'p') {
fprintf(stderr,
"Input video is interlaced; "
"Only progressive scan handled.\n");
return -1;
}
- _y4m->vpx_fmt = VPX_IMG_FMT_I420;
- _y4m->bps = 12;
- _y4m->bit_depth = 8;
- if (strcmp(_y4m->chroma_type, "420") == 0 ||
- strcmp(_y4m->chroma_type, "420jpeg") == 0) {
- _y4m->src_c_dec_h = _y4m->dst_c_dec_h = _y4m->src_c_dec_v =
- _y4m->dst_c_dec_v = 2;
- _y4m->dst_buf_read_sz =
- _y4m->pic_w * _y4m->pic_h +
- 2 * ((_y4m->pic_w + 1) / 2) * ((_y4m->pic_h + 1) / 2);
+ y4m_ctx->vpx_fmt = VPX_IMG_FMT_I420;
+ y4m_ctx->bps = 12;
+ y4m_ctx->bit_depth = 8;
+ y4m_ctx->aux_buf = NULL;
+ y4m_ctx->dst_buf = NULL;
+ if (strcmp(y4m_ctx->chroma_type, "420") == 0 ||
+ strcmp(y4m_ctx->chroma_type, "420jpeg") == 0 ||
+ strcmp(y4m_ctx->chroma_type, "420mpeg2") == 0) {
+ y4m_ctx->src_c_dec_h = y4m_ctx->dst_c_dec_h = y4m_ctx->src_c_dec_v =
+ y4m_ctx->dst_c_dec_v = 2;
+ y4m_ctx->dst_buf_read_sz =
+ y4m_ctx->pic_w * y4m_ctx->pic_h +
+ 2 * ((y4m_ctx->pic_w + 1) / 2) * ((y4m_ctx->pic_h + 1) / 2);
/* Natively supported: no conversion required. */
- _y4m->aux_buf_sz = _y4m->aux_buf_read_sz = 0;
- _y4m->convert = y4m_convert_null;
- } else if (strcmp(_y4m->chroma_type, "420p10") == 0) {
- _y4m->src_c_dec_h = 2;
- _y4m->dst_c_dec_h = 2;
- _y4m->src_c_dec_v = 2;
- _y4m->dst_c_dec_v = 2;
- _y4m->dst_buf_read_sz =
- 2 * (_y4m->pic_w * _y4m->pic_h +
- 2 * ((_y4m->pic_w + 1) / 2) * ((_y4m->pic_h + 1) / 2));
+ y4m_ctx->aux_buf_sz = y4m_ctx->aux_buf_read_sz = 0;
+ y4m_ctx->convert = y4m_convert_null;
+ } else if (strcmp(y4m_ctx->chroma_type, "420p10") == 0) {
+ y4m_ctx->src_c_dec_h = 2;
+ y4m_ctx->dst_c_dec_h = 2;
+ y4m_ctx->src_c_dec_v = 2;
+ y4m_ctx->dst_c_dec_v = 2;
+ y4m_ctx->dst_buf_read_sz =
+ 2 * (y4m_ctx->pic_w * y4m_ctx->pic_h +
+ 2 * ((y4m_ctx->pic_w + 1) / 2) * ((y4m_ctx->pic_h + 1) / 2));
/* Natively supported: no conversion required. */
- _y4m->aux_buf_sz = _y4m->aux_buf_read_sz = 0;
- _y4m->convert = y4m_convert_null;
- _y4m->bit_depth = 10;
- _y4m->bps = 15;
- _y4m->vpx_fmt = VPX_IMG_FMT_I42016;
+ y4m_ctx->aux_buf_sz = y4m_ctx->aux_buf_read_sz = 0;
+ y4m_ctx->convert = y4m_convert_null;
+ y4m_ctx->bit_depth = 10;
+ y4m_ctx->bps = 15;
+ y4m_ctx->vpx_fmt = VPX_IMG_FMT_I42016;
if (only_420) {
fprintf(stderr, "Unsupported conversion from 420p10 to 420jpeg\n");
return -1;
}
- } else if (strcmp(_y4m->chroma_type, "420p12") == 0) {
- _y4m->src_c_dec_h = 2;
- _y4m->dst_c_dec_h = 2;
- _y4m->src_c_dec_v = 2;
- _y4m->dst_c_dec_v = 2;
- _y4m->dst_buf_read_sz =
- 2 * (_y4m->pic_w * _y4m->pic_h +
- 2 * ((_y4m->pic_w + 1) / 2) * ((_y4m->pic_h + 1) / 2));
+ } else if (strcmp(y4m_ctx->chroma_type, "420p12") == 0) {
+ y4m_ctx->src_c_dec_h = 2;
+ y4m_ctx->dst_c_dec_h = 2;
+ y4m_ctx->src_c_dec_v = 2;
+ y4m_ctx->dst_c_dec_v = 2;
+ y4m_ctx->dst_buf_read_sz =
+ 2 * (y4m_ctx->pic_w * y4m_ctx->pic_h +
+ 2 * ((y4m_ctx->pic_w + 1) / 2) * ((y4m_ctx->pic_h + 1) / 2));
/* Natively supported: no conversion required. */
- _y4m->aux_buf_sz = _y4m->aux_buf_read_sz = 0;
- _y4m->convert = y4m_convert_null;
- _y4m->bit_depth = 12;
- _y4m->bps = 18;
- _y4m->vpx_fmt = VPX_IMG_FMT_I42016;
+ y4m_ctx->aux_buf_sz = y4m_ctx->aux_buf_read_sz = 0;
+ y4m_ctx->convert = y4m_convert_null;
+ y4m_ctx->bit_depth = 12;
+ y4m_ctx->bps = 18;
+ y4m_ctx->vpx_fmt = VPX_IMG_FMT_I42016;
if (only_420) {
fprintf(stderr, "Unsupported conversion from 420p12 to 420jpeg\n");
return -1;
}
- } else if (strcmp(_y4m->chroma_type, "420mpeg2") == 0) {
- _y4m->src_c_dec_h = _y4m->dst_c_dec_h = _y4m->src_c_dec_v =
- _y4m->dst_c_dec_v = 2;
- _y4m->dst_buf_read_sz = _y4m->pic_w * _y4m->pic_h;
- /*Chroma filter required: read into the aux buf first.*/
- _y4m->aux_buf_sz = _y4m->aux_buf_read_sz =
- 2 * ((_y4m->pic_w + 1) / 2) * ((_y4m->pic_h + 1) / 2);
- _y4m->convert = y4m_convert_42xmpeg2_42xjpeg;
- } else if (strcmp(_y4m->chroma_type, "420paldv") == 0) {
- _y4m->src_c_dec_h = _y4m->dst_c_dec_h = _y4m->src_c_dec_v =
- _y4m->dst_c_dec_v = 2;
- _y4m->dst_buf_read_sz = _y4m->pic_w * _y4m->pic_h;
+ } else if (strcmp(y4m_ctx->chroma_type, "420paldv") == 0) {
+ y4m_ctx->src_c_dec_h = y4m_ctx->dst_c_dec_h = y4m_ctx->src_c_dec_v =
+ y4m_ctx->dst_c_dec_v = 2;
+ y4m_ctx->dst_buf_read_sz = y4m_ctx->pic_w * y4m_ctx->pic_h;
/*Chroma filter required: read into the aux buf first.
We need to make two filter passes, so we need some extra space in the
aux buffer.*/
- _y4m->aux_buf_sz = 3 * ((_y4m->pic_w + 1) / 2) * ((_y4m->pic_h + 1) / 2);
- _y4m->aux_buf_read_sz =
- 2 * ((_y4m->pic_w + 1) / 2) * ((_y4m->pic_h + 1) / 2);
- _y4m->convert = y4m_convert_42xpaldv_42xjpeg;
- } else if (strcmp(_y4m->chroma_type, "422jpeg") == 0) {
- _y4m->src_c_dec_h = _y4m->dst_c_dec_h = 2;
- _y4m->src_c_dec_v = 1;
- _y4m->dst_c_dec_v = 2;
- _y4m->dst_buf_read_sz = _y4m->pic_w * _y4m->pic_h;
+ y4m_ctx->aux_buf_sz =
+ 3 * ((y4m_ctx->pic_w + 1) / 2) * ((y4m_ctx->pic_h + 1) / 2);
+ y4m_ctx->aux_buf_read_sz =
+ 2 * ((y4m_ctx->pic_w + 1) / 2) * ((y4m_ctx->pic_h + 1) / 2);
+ y4m_ctx->convert = y4m_convert_42xpaldv_42xjpeg;
+ } else if (strcmp(y4m_ctx->chroma_type, "422jpeg") == 0) {
+ y4m_ctx->src_c_dec_h = y4m_ctx->dst_c_dec_h = 2;
+ y4m_ctx->src_c_dec_v = 1;
+ y4m_ctx->dst_c_dec_v = 2;
+ y4m_ctx->dst_buf_read_sz = y4m_ctx->pic_w * y4m_ctx->pic_h;
/*Chroma filter required: read into the aux buf first.*/
- _y4m->aux_buf_sz = _y4m->aux_buf_read_sz =
- 2 * ((_y4m->pic_w + 1) / 2) * _y4m->pic_h;
- _y4m->convert = y4m_convert_422jpeg_420jpeg;
- } else if (strcmp(_y4m->chroma_type, "422") == 0) {
- _y4m->src_c_dec_h = 2;
- _y4m->src_c_dec_v = 1;
+ y4m_ctx->aux_buf_sz = y4m_ctx->aux_buf_read_sz =
+ 2 * ((y4m_ctx->pic_w + 1) / 2) * y4m_ctx->pic_h;
+ y4m_ctx->convert = y4m_convert_422jpeg_420jpeg;
+ } else if (strcmp(y4m_ctx->chroma_type, "422") == 0) {
+ y4m_ctx->src_c_dec_h = 2;
+ y4m_ctx->src_c_dec_v = 1;
if (only_420) {
- _y4m->dst_c_dec_h = 2;
- _y4m->dst_c_dec_v = 2;
- _y4m->dst_buf_read_sz = _y4m->pic_w * _y4m->pic_h;
+ y4m_ctx->dst_c_dec_h = 2;
+ y4m_ctx->dst_c_dec_v = 2;
+ y4m_ctx->dst_buf_read_sz = y4m_ctx->pic_w * y4m_ctx->pic_h;
/*Chroma filter required: read into the aux buf first.
We need to make two filter passes, so we need some extra space in the
aux buffer.*/
- _y4m->aux_buf_read_sz = 2 * ((_y4m->pic_w + 1) / 2) * _y4m->pic_h;
- _y4m->aux_buf_sz =
- _y4m->aux_buf_read_sz + ((_y4m->pic_w + 1) / 2) * _y4m->pic_h;
- _y4m->convert = y4m_convert_422_420jpeg;
+ y4m_ctx->aux_buf_read_sz =
+ 2 * ((y4m_ctx->pic_w + 1) / 2) * y4m_ctx->pic_h;
+ y4m_ctx->aux_buf_sz = y4m_ctx->aux_buf_read_sz +
+ ((y4m_ctx->pic_w + 1) / 2) * y4m_ctx->pic_h;
+ y4m_ctx->convert = y4m_convert_422_420jpeg;
} else {
- _y4m->vpx_fmt = VPX_IMG_FMT_I422;
- _y4m->bps = 16;
- _y4m->dst_c_dec_h = _y4m->src_c_dec_h;
- _y4m->dst_c_dec_v = _y4m->src_c_dec_v;
- _y4m->dst_buf_read_sz =
- _y4m->pic_w * _y4m->pic_h + 2 * ((_y4m->pic_w + 1) / 2) * _y4m->pic_h;
+ y4m_ctx->vpx_fmt = VPX_IMG_FMT_I422;
+ y4m_ctx->bps = 16;
+ y4m_ctx->dst_c_dec_h = y4m_ctx->src_c_dec_h;
+ y4m_ctx->dst_c_dec_v = y4m_ctx->src_c_dec_v;
+ y4m_ctx->dst_buf_read_sz =
+ y4m_ctx->pic_w * y4m_ctx->pic_h +
+ 2 * ((y4m_ctx->pic_w + 1) / 2) * y4m_ctx->pic_h;
/*Natively supported: no conversion required.*/
- _y4m->aux_buf_sz = _y4m->aux_buf_read_sz = 0;
- _y4m->convert = y4m_convert_null;
+ y4m_ctx->aux_buf_sz = y4m_ctx->aux_buf_read_sz = 0;
+ y4m_ctx->convert = y4m_convert_null;
}
- } else if (strcmp(_y4m->chroma_type, "422p10") == 0) {
- _y4m->src_c_dec_h = 2;
- _y4m->src_c_dec_v = 1;
- _y4m->vpx_fmt = VPX_IMG_FMT_I42216;
- _y4m->bps = 20;
- _y4m->bit_depth = 10;
- _y4m->dst_c_dec_h = _y4m->src_c_dec_h;
- _y4m->dst_c_dec_v = _y4m->src_c_dec_v;
- _y4m->dst_buf_read_sz = 2 * (_y4m->pic_w * _y4m->pic_h +
- 2 * ((_y4m->pic_w + 1) / 2) * _y4m->pic_h);
- _y4m->aux_buf_sz = _y4m->aux_buf_read_sz = 0;
- _y4m->convert = y4m_convert_null;
+ } else if (strcmp(y4m_ctx->chroma_type, "422p10") == 0) {
+ y4m_ctx->src_c_dec_h = 2;
+ y4m_ctx->src_c_dec_v = 1;
+ y4m_ctx->vpx_fmt = VPX_IMG_FMT_I42216;
+ y4m_ctx->bps = 20;
+ y4m_ctx->bit_depth = 10;
+ y4m_ctx->dst_c_dec_h = y4m_ctx->src_c_dec_h;
+ y4m_ctx->dst_c_dec_v = y4m_ctx->src_c_dec_v;
+ y4m_ctx->dst_buf_read_sz =
+ 2 * (y4m_ctx->pic_w * y4m_ctx->pic_h +
+ 2 * ((y4m_ctx->pic_w + 1) / 2) * y4m_ctx->pic_h);
+ y4m_ctx->aux_buf_sz = y4m_ctx->aux_buf_read_sz = 0;
+ y4m_ctx->convert = y4m_convert_null;
if (only_420) {
fprintf(stderr, "Unsupported conversion from 422p10 to 420jpeg\n");
return -1;
}
- } else if (strcmp(_y4m->chroma_type, "422p12") == 0) {
- _y4m->src_c_dec_h = 2;
- _y4m->src_c_dec_v = 1;
- _y4m->vpx_fmt = VPX_IMG_FMT_I42216;
- _y4m->bps = 24;
- _y4m->bit_depth = 12;
- _y4m->dst_c_dec_h = _y4m->src_c_dec_h;
- _y4m->dst_c_dec_v = _y4m->src_c_dec_v;
- _y4m->dst_buf_read_sz = 2 * (_y4m->pic_w * _y4m->pic_h +
- 2 * ((_y4m->pic_w + 1) / 2) * _y4m->pic_h);
- _y4m->aux_buf_sz = _y4m->aux_buf_read_sz = 0;
- _y4m->convert = y4m_convert_null;
+ } else if (strcmp(y4m_ctx->chroma_type, "422p12") == 0) {
+ y4m_ctx->src_c_dec_h = 2;
+ y4m_ctx->src_c_dec_v = 1;
+ y4m_ctx->vpx_fmt = VPX_IMG_FMT_I42216;
+ y4m_ctx->bps = 24;
+ y4m_ctx->bit_depth = 12;
+ y4m_ctx->dst_c_dec_h = y4m_ctx->src_c_dec_h;
+ y4m_ctx->dst_c_dec_v = y4m_ctx->src_c_dec_v;
+ y4m_ctx->dst_buf_read_sz =
+ 2 * (y4m_ctx->pic_w * y4m_ctx->pic_h +
+ 2 * ((y4m_ctx->pic_w + 1) / 2) * y4m_ctx->pic_h);
+ y4m_ctx->aux_buf_sz = y4m_ctx->aux_buf_read_sz = 0;
+ y4m_ctx->convert = y4m_convert_null;
if (only_420) {
fprintf(stderr, "Unsupported conversion from 422p12 to 420jpeg\n");
return -1;
}
- } else if (strcmp(_y4m->chroma_type, "411") == 0) {
- _y4m->src_c_dec_h = 4;
- _y4m->dst_c_dec_h = 2;
- _y4m->src_c_dec_v = 1;
- _y4m->dst_c_dec_v = 2;
- _y4m->dst_buf_read_sz = _y4m->pic_w * _y4m->pic_h;
+ } else if (strcmp(y4m_ctx->chroma_type, "411") == 0) {
+ y4m_ctx->src_c_dec_h = 4;
+ y4m_ctx->dst_c_dec_h = 2;
+ y4m_ctx->src_c_dec_v = 1;
+ y4m_ctx->dst_c_dec_v = 2;
+ y4m_ctx->dst_buf_read_sz = y4m_ctx->pic_w * y4m_ctx->pic_h;
/*Chroma filter required: read into the aux buf first.
We need to make two filter passes, so we need some extra space in the
aux buffer.*/
- _y4m->aux_buf_read_sz = 2 * ((_y4m->pic_w + 3) / 4) * _y4m->pic_h;
- _y4m->aux_buf_sz =
- _y4m->aux_buf_read_sz + ((_y4m->pic_w + 1) / 2) * _y4m->pic_h;
- _y4m->convert = y4m_convert_411_420jpeg;
+ y4m_ctx->aux_buf_read_sz = 2 * ((y4m_ctx->pic_w + 3) / 4) * y4m_ctx->pic_h;
+ y4m_ctx->aux_buf_sz =
+ y4m_ctx->aux_buf_read_sz + ((y4m_ctx->pic_w + 1) / 2) * y4m_ctx->pic_h;
+ y4m_ctx->convert = y4m_convert_411_420jpeg;
fprintf(stderr, "Unsupported conversion from yuv 411\n");
return -1;
- } else if (strcmp(_y4m->chroma_type, "444") == 0) {
- _y4m->src_c_dec_h = 1;
- _y4m->src_c_dec_v = 1;
+ } else if (strcmp(y4m_ctx->chroma_type, "444") == 0) {
+ y4m_ctx->src_c_dec_h = 1;
+ y4m_ctx->src_c_dec_v = 1;
if (only_420) {
- _y4m->dst_c_dec_h = 2;
- _y4m->dst_c_dec_v = 2;
- _y4m->dst_buf_read_sz = _y4m->pic_w * _y4m->pic_h;
+ y4m_ctx->dst_c_dec_h = 2;
+ y4m_ctx->dst_c_dec_v = 2;
+ y4m_ctx->dst_buf_read_sz = y4m_ctx->pic_w * y4m_ctx->pic_h;
/*Chroma filter required: read into the aux buf first.
We need to make two filter passes, so we need some extra space in the
aux buffer.*/
- _y4m->aux_buf_read_sz = 2 * _y4m->pic_w * _y4m->pic_h;
- _y4m->aux_buf_sz =
- _y4m->aux_buf_read_sz + ((_y4m->pic_w + 1) / 2) * _y4m->pic_h;
- _y4m->convert = y4m_convert_444_420jpeg;
+ y4m_ctx->aux_buf_read_sz = 2 * y4m_ctx->pic_w * y4m_ctx->pic_h;
+ y4m_ctx->aux_buf_sz = y4m_ctx->aux_buf_read_sz +
+ ((y4m_ctx->pic_w + 1) / 2) * y4m_ctx->pic_h;
+ y4m_ctx->convert = y4m_convert_444_420jpeg;
} else {
- _y4m->vpx_fmt = VPX_IMG_FMT_I444;
- _y4m->bps = 24;
- _y4m->dst_c_dec_h = _y4m->src_c_dec_h;
- _y4m->dst_c_dec_v = _y4m->src_c_dec_v;
- _y4m->dst_buf_read_sz = 3 * _y4m->pic_w * _y4m->pic_h;
+ y4m_ctx->vpx_fmt = VPX_IMG_FMT_I444;
+ y4m_ctx->bps = 24;
+ y4m_ctx->dst_c_dec_h = y4m_ctx->src_c_dec_h;
+ y4m_ctx->dst_c_dec_v = y4m_ctx->src_c_dec_v;
+ y4m_ctx->dst_buf_read_sz = 3 * y4m_ctx->pic_w * y4m_ctx->pic_h;
/*Natively supported: no conversion required.*/
- _y4m->aux_buf_sz = _y4m->aux_buf_read_sz = 0;
- _y4m->convert = y4m_convert_null;
+ y4m_ctx->aux_buf_sz = y4m_ctx->aux_buf_read_sz = 0;
+ y4m_ctx->convert = y4m_convert_null;
}
- } else if (strcmp(_y4m->chroma_type, "444p10") == 0) {
- _y4m->src_c_dec_h = 1;
- _y4m->src_c_dec_v = 1;
- _y4m->vpx_fmt = VPX_IMG_FMT_I44416;
- _y4m->bps = 30;
- _y4m->bit_depth = 10;
- _y4m->dst_c_dec_h = _y4m->src_c_dec_h;
- _y4m->dst_c_dec_v = _y4m->src_c_dec_v;
- _y4m->dst_buf_read_sz = 2 * 3 * _y4m->pic_w * _y4m->pic_h;
- _y4m->aux_buf_sz = _y4m->aux_buf_read_sz = 0;
- _y4m->convert = y4m_convert_null;
+ } else if (strcmp(y4m_ctx->chroma_type, "444p10") == 0) {
+ y4m_ctx->src_c_dec_h = 1;
+ y4m_ctx->src_c_dec_v = 1;
+ y4m_ctx->vpx_fmt = VPX_IMG_FMT_I44416;
+ y4m_ctx->bps = 30;
+ y4m_ctx->bit_depth = 10;
+ y4m_ctx->dst_c_dec_h = y4m_ctx->src_c_dec_h;
+ y4m_ctx->dst_c_dec_v = y4m_ctx->src_c_dec_v;
+ y4m_ctx->dst_buf_read_sz = 2 * 3 * y4m_ctx->pic_w * y4m_ctx->pic_h;
+ y4m_ctx->aux_buf_sz = y4m_ctx->aux_buf_read_sz = 0;
+ y4m_ctx->convert = y4m_convert_null;
if (only_420) {
fprintf(stderr, "Unsupported conversion from 444p10 to 420jpeg\n");
return -1;
}
- } else if (strcmp(_y4m->chroma_type, "444p12") == 0) {
- _y4m->src_c_dec_h = 1;
- _y4m->src_c_dec_v = 1;
- _y4m->vpx_fmt = VPX_IMG_FMT_I44416;
- _y4m->bps = 36;
- _y4m->bit_depth = 12;
- _y4m->dst_c_dec_h = _y4m->src_c_dec_h;
- _y4m->dst_c_dec_v = _y4m->src_c_dec_v;
- _y4m->dst_buf_read_sz = 2 * 3 * _y4m->pic_w * _y4m->pic_h;
- _y4m->aux_buf_sz = _y4m->aux_buf_read_sz = 0;
- _y4m->convert = y4m_convert_null;
+ } else if (strcmp(y4m_ctx->chroma_type, "444p12") == 0) {
+ y4m_ctx->src_c_dec_h = 1;
+ y4m_ctx->src_c_dec_v = 1;
+ y4m_ctx->vpx_fmt = VPX_IMG_FMT_I44416;
+ y4m_ctx->bps = 36;
+ y4m_ctx->bit_depth = 12;
+ y4m_ctx->dst_c_dec_h = y4m_ctx->src_c_dec_h;
+ y4m_ctx->dst_c_dec_v = y4m_ctx->src_c_dec_v;
+ y4m_ctx->dst_buf_read_sz = 2 * 3 * y4m_ctx->pic_w * y4m_ctx->pic_h;
+ y4m_ctx->aux_buf_sz = y4m_ctx->aux_buf_read_sz = 0;
+ y4m_ctx->convert = y4m_convert_null;
if (only_420) {
fprintf(stderr, "Unsupported conversion from 444p12 to 420jpeg\n");
return -1;
}
- } else if (strcmp(_y4m->chroma_type, "mono") == 0) {
- _y4m->src_c_dec_h = _y4m->src_c_dec_v = 0;
- _y4m->dst_c_dec_h = _y4m->dst_c_dec_v = 2;
- _y4m->dst_buf_read_sz = _y4m->pic_w * _y4m->pic_h;
+ } else if (strcmp(y4m_ctx->chroma_type, "mono") == 0) {
+ y4m_ctx->src_c_dec_h = y4m_ctx->src_c_dec_v = 0;
+ y4m_ctx->dst_c_dec_h = y4m_ctx->dst_c_dec_v = 2;
+ y4m_ctx->dst_buf_read_sz = y4m_ctx->pic_w * y4m_ctx->pic_h;
/*No extra space required, but we need to clear the chroma planes.*/
- _y4m->aux_buf_sz = _y4m->aux_buf_read_sz = 0;
- _y4m->convert = y4m_convert_mono_420jpeg;
+ y4m_ctx->aux_buf_sz = y4m_ctx->aux_buf_read_sz = 0;
+ y4m_ctx->convert = y4m_convert_mono_420jpeg;
} else {
- fprintf(stderr, "Unknown chroma sampling type: %s\n", _y4m->chroma_type);
+ fprintf(stderr, "Unknown chroma sampling type: %s\n", y4m_ctx->chroma_type);
return -1;
}
/*The size of the final frame buffers is always computed from the
destination chroma decimation type.*/
- _y4m->dst_buf_sz =
- _y4m->pic_w * _y4m->pic_h +
- 2 * ((_y4m->pic_w + _y4m->dst_c_dec_h - 1) / _y4m->dst_c_dec_h) *
- ((_y4m->pic_h + _y4m->dst_c_dec_v - 1) / _y4m->dst_c_dec_v);
- if (_y4m->bit_depth == 8)
- _y4m->dst_buf = (unsigned char *)malloc(_y4m->dst_buf_sz);
+ y4m_ctx->dst_buf_sz =
+ y4m_ctx->pic_w * y4m_ctx->pic_h +
+ 2 * ((y4m_ctx->pic_w + y4m_ctx->dst_c_dec_h - 1) / y4m_ctx->dst_c_dec_h) *
+ ((y4m_ctx->pic_h + y4m_ctx->dst_c_dec_v - 1) / y4m_ctx->dst_c_dec_v);
+ if (y4m_ctx->bit_depth == 8)
+ y4m_ctx->dst_buf = (unsigned char *)malloc(y4m_ctx->dst_buf_sz);
else
- _y4m->dst_buf = (unsigned char *)malloc(2 * _y4m->dst_buf_sz);
+ y4m_ctx->dst_buf = (unsigned char *)malloc(2 * y4m_ctx->dst_buf_sz);
- if (_y4m->aux_buf_sz > 0)
- _y4m->aux_buf = (unsigned char *)malloc(_y4m->aux_buf_sz);
+ if (y4m_ctx->aux_buf_sz > 0)
+ y4m_ctx->aux_buf = (unsigned char *)malloc(y4m_ctx->aux_buf_sz);
return 0;
}
--- a/y4minput.h
+++ b/y4minput.h
@@ -56,8 +56,16 @@
unsigned int bit_depth;
};
-int y4m_input_open(y4m_input *_y4m, FILE *_fin, char *_skip, int _nskip,
- int only_420);
+/**
+ * Open the input file, treating it as Y4M. |y4m_ctx| is filled in after
+ * reading it. The |skip_buffer| indicates bytes that were previously read
+ * from |file|, to do input-type detection; this buffer will be read before
+ * the |file| is read. It is of size |num_skip|, which *must* be 8 or less.
+ *
+ * Returns 0 on success, -1 on failure.
+ */
+int y4m_input_open(y4m_input *y4m_ctx, FILE *file, char *skip_buffer,
+ int num_skip, int only_420);
void y4m_input_close(y4m_input *_y4m);
int y4m_input_fetch_frame(y4m_input *_y4m, FILE *_fin, vpx_image_t *img);