ref: 4e2cfb63de974939b72d990005b36d8c2fac8352
parent: 97cd0bd5dbe2ad4f64fcaedaeb48d60d7874bfde
parent: fad865c54ad99ada3e53ea4a290688f0c56aa15d
author: James Zern <jzern@google.com>
date: Mon Sep 30 17:53:34 EDT 2019
Merge "namespace ARCH_* defines"
--- a/build/make/Makefile
+++ b/build/make/Makefile
@@ -430,10 +430,10 @@
DIST-SRCS-$(CONFIG_MSVS) += build/make/gen_msvs_vcxproj.sh
DIST-SRCS-$(CONFIG_MSVS) += build/make/msvs_common.sh
DIST-SRCS-$(CONFIG_RVCT) += build/make/armlink_adapter.sh
- DIST-SRCS-$(ARCH_ARM) += build/make/ads2gas.pl
- DIST-SRCS-$(ARCH_ARM) += build/make/ads2gas_apple.pl
- DIST-SRCS-$(ARCH_ARM) += build/make/ads2armasm_ms.pl
- DIST-SRCS-$(ARCH_ARM) += build/make/thumb.pm
+ DIST-SRCS-$(VPX_ARCH_ARM) += build/make/ads2gas.pl
+ DIST-SRCS-$(VPX_ARCH_ARM) += build/make/ads2gas_apple.pl
+ DIST-SRCS-$(VPX_ARCH_ARM) += build/make/ads2armasm_ms.pl
+ DIST-SRCS-$(VPX_ARCH_ARM) += build/make/thumb.pm
DIST-SRCS-yes += $(target:-$(TOOLCHAIN)=).mk
endif
INSTALL-SRCS := $(call cond_enabled,CONFIG_INSTALL_SRCS,INSTALL-SRCS)
--- a/build/make/configure.sh
+++ b/build/make/configure.sh
@@ -518,10 +518,10 @@
EOF
fi
- print_config_mk ARCH "${1}" ${ARCH_LIST}
- print_config_mk HAVE "${1}" ${HAVE_LIST}
- print_config_mk CONFIG "${1}" ${CONFIG_LIST}
- print_config_mk HAVE "${1}" gnu_strip
+ print_config_mk VPX_ARCH "${1}" ${ARCH_LIST}
+ print_config_mk HAVE "${1}" ${HAVE_LIST}
+ print_config_mk CONFIG "${1}" ${CONFIG_LIST}
+ print_config_mk HAVE "${1}" gnu_strip
enabled msvs && echo "CONFIG_VS_VERSION=${vs_version}" >> "${1}"
@@ -538,10 +538,10 @@
#define RESTRICT ${RESTRICT}
#define INLINE ${INLINE}
EOF
- print_config_h ARCH "${TMP_H}" ${ARCH_LIST}
- print_config_h HAVE "${TMP_H}" ${HAVE_LIST}
- print_config_h CONFIG "${TMP_H}" ${CONFIG_LIST}
- print_config_vars_h "${TMP_H}" ${VAR_LIST}
+ print_config_h VPX_ARCH "${TMP_H}" ${ARCH_LIST}
+ print_config_h HAVE "${TMP_H}" ${HAVE_LIST}
+ print_config_h CONFIG "${TMP_H}" ${CONFIG_LIST}
+ print_config_vars_h "${TMP_H}" ${VAR_LIST}
echo "#endif /* VPX_CONFIG_H */" >> ${TMP_H}
mkdir -p `dirname "$1"`
cmp "$1" ${TMP_H} >/dev/null 2>&1 || mv ${TMP_H} "$1"
--- a/libs.mk
+++ b/libs.mk
@@ -11,7 +11,7 @@
# ARM assembly files are written in RVCT-style. We use some make magic to
# filter those files to allow GCC compilation
-ifeq ($(ARCH_ARM),yes)
+ifeq ($(VPX_ARCH_ARM),yes)
ASM:=$(if $(filter yes,$(CONFIG_GCC)$(CONFIG_MSVS)),.asm.S,.asm)
else
ASM:=.asm
@@ -139,7 +139,7 @@
CODEC_SRCS-yes += vpx_ports/vpx_once.h
CODEC_SRCS-yes += $(BUILD_PFX)vpx_config.c
INSTALL-SRCS-no += $(BUILD_PFX)vpx_config.c
-ifeq ($(ARCH_X86)$(ARCH_X86_64),yes)
+ifeq ($(VPX_ARCH_X86)$(VPX_ARCH_X86_64),yes)
INSTALL-SRCS-$(CONFIG_CODEC_SRCS) += third_party/x86inc/x86inc.asm
INSTALL-SRCS-$(CONFIG_CODEC_SRCS) += vpx_dsp/x86/bitdepth_conversion_sse2.asm
endif
@@ -347,7 +347,7 @@
#
# Rule to make assembler configuration file from C configuration file
#
-ifeq ($(ARCH_X86)$(ARCH_X86_64),yes)
+ifeq ($(VPX_ARCH_X86)$(VPX_ARCH_X86_64),yes)
# YASM
$(BUILD_PFX)vpx_config.asm: $(BUILD_PFX)vpx_config.h
@echo " [CREATE] $@"
--- a/test/convolve_test.cc
+++ b/test/convolve_test.cc
@@ -1133,7 +1133,7 @@
x0_q4, x_step_q4, y0_q4, y_step_q4, w, h, bd); \
}
-#if HAVE_SSE2 && ARCH_X86_64
+#if HAVE_SSE2 && VPX_ARCH_X86_64
WRAP(convolve_copy_sse2, 8)
WRAP(convolve_avg_sse2, 8)
WRAP(convolve_copy_sse2, 10)
@@ -1158,7 +1158,7 @@
WRAP(convolve8_avg_vert_sse2, 12)
WRAP(convolve8_sse2, 12)
WRAP(convolve8_avg_sse2, 12)
-#endif // HAVE_SSE2 && ARCH_X86_64
+#endif // HAVE_SSE2 && VPX_ARCH_X86_64
#if HAVE_AVX2
WRAP(convolve_copy_avx2, 8)
@@ -1278,7 +1278,7 @@
#endif
INSTANTIATE_TEST_CASE_P(C, ConvolveTest, ::testing::ValuesIn(kArrayConvolve_c));
-#if HAVE_SSE2 && ARCH_X86_64
+#if HAVE_SSE2 && VPX_ARCH_X86_64
#if CONFIG_VP9_HIGHBITDEPTH
const ConvolveFunctions convolve8_sse2(
wrap_convolve_copy_sse2_8, wrap_convolve_avg_sse2_8,
--- a/test/dct_test.cc
+++ b/test/dct_test.cc
@@ -514,7 +514,7 @@
::testing::Values(VPX_BITS_8, VPX_BITS_10, VPX_BITS_12)));
#endif // HAVE_SSE2
-#if HAVE_SSSE3 && !CONFIG_VP9_HIGHBITDEPTH && ARCH_X86_64
+#if HAVE_SSSE3 && !CONFIG_VP9_HIGHBITDEPTH && VPX_ARCH_X86_64
// vpx_fdct8x8_ssse3 is only available in 64 bit builds.
static const FuncInfo dct_ssse3_func_info = {
&fdct_wrapper<vpx_fdct8x8_ssse3>, &idct_wrapper<vpx_idct8x8_64_add_sse2>, 8, 1
@@ -524,7 +524,7 @@
INSTANTIATE_TEST_CASE_P(SSSE3, TransDCT,
::testing::Values(make_tuple(0, &dct_ssse3_func_info, 0,
VPX_BITS_8)));
-#endif // HAVE_SSSE3 && !CONFIG_VP9_HIGHBITDEPTH && ARCH_X86_64
+#endif // HAVE_SSSE3 && !CONFIG_VP9_HIGHBITDEPTH && VPX_ARCH_X86_64
#if HAVE_AVX2 && !CONFIG_VP9_HIGHBITDEPTH
static const FuncInfo dct_avx2_func_info = {
--- a/test/fdct8x8_test.cc
+++ b/test/fdct8x8_test.cc
@@ -737,7 +737,7 @@
make_tuple(&idct8x8_12, &idct8x8_64_add_12_sse2, 6225, VPX_BITS_12)));
#endif // HAVE_SSE2 && CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
-#if HAVE_SSSE3 && ARCH_X86_64 && !CONFIG_VP9_HIGHBITDEPTH && \
+#if HAVE_SSSE3 && VPX_ARCH_X86_64 && !CONFIG_VP9_HIGHBITDEPTH && \
!CONFIG_EMULATE_HARDWARE
INSTANTIATE_TEST_CASE_P(SSSE3, FwdTrans8x8DCT,
::testing::Values(make_tuple(&vpx_fdct8x8_ssse3,
--- a/test/hadamard_test.cc
+++ b/test/hadamard_test.cc
@@ -254,11 +254,11 @@
HadamardFuncWithSize(&vpx_hadamard_32x32_avx2, 32)));
#endif // HAVE_AVX2
-#if HAVE_SSSE3 && ARCH_X86_64
+#if HAVE_SSSE3 && VPX_ARCH_X86_64
INSTANTIATE_TEST_CASE_P(
SSSE3, HadamardLowbdTest,
::testing::Values(HadamardFuncWithSize(&vpx_hadamard_8x8_ssse3, 8)));
-#endif // HAVE_SSSE3 && ARCH_X86_64
+#endif // HAVE_SSSE3 && VPX_ARCH_X86_64
#if HAVE_NEON
INSTANTIATE_TEST_CASE_P(
--- a/test/invalid_file_test.cc
+++ b/test/invalid_file_test.cc
@@ -147,7 +147,7 @@
// This file will cause a large allocation which is expected to fail in 32-bit
// environments. Test x86 for coverage purposes as the allocation failure will
// be in platform agnostic code.
-#if ARCH_X86
+#if VPX_ARCH_X86
{ 1, "invalid-vp90-2-00-quantizer-63.ivf.kf_65527x61446.ivf" },
#endif
{ 1, "invalid-vp90-2-12-droppable_1.ivf.s3676_r01-05_b6-.ivf" },
--- a/test/register_state_check.h
+++ b/test/register_state_check.h
@@ -28,7 +28,7 @@
// See platform implementations of RegisterStateCheckXXX for details.
//
-#if defined(_WIN64) && ARCH_X86_64
+#if defined(_WIN64) && VPX_ARCH_X86_64
#undef NOMINMAX
#define NOMINMAX
@@ -138,9 +138,9 @@
} // namespace libvpx_test
-#endif // _WIN64 && ARCH_X86_64
+#endif // _WIN64 && VPX_ARCH_X86_64
-#if ARCH_X86 || ARCH_X86_64
+#if VPX_ARCH_X86 || VPX_ARCH_X86_64
#if defined(__GNUC__)
namespace libvpx_test {
@@ -178,7 +178,7 @@
} // namespace libvpx_test
#endif // __GNUC__
-#endif // ARCH_X86 || ARCH_X86_64
+#endif // VPX_ARCH_X86 || VPX_ARCH_X86_64
#ifndef API_REGISTER_STATE_CHECK
#define API_REGISTER_STATE_CHECK ASM_REGISTER_STATE_CHECK
--- a/test/test_libvpx.cc
+++ b/test/test_libvpx.cc
@@ -12,7 +12,7 @@
#include "third_party/googletest/src/include/gtest/gtest.h"
#include "./vpx_config.h"
-#if ARCH_X86 || ARCH_X86_64
+#if VPX_ARCH_X86 || VPX_ARCH_X86_64
#include "vpx_ports/x86.h"
#endif
extern "C" {
@@ -26,7 +26,7 @@
extern void vpx_scale_rtcd();
}
-#if ARCH_X86 || ARCH_X86_64
+#if VPX_ARCH_X86 || VPX_ARCH_X86_64
static void append_negative_gtest_filter(const char *str) {
std::string filter = ::testing::FLAGS_gtest_filter;
// Negative patterns begin with one '-' followed by a ':' separated list.
@@ -34,12 +34,12 @@
filter += str;
::testing::FLAGS_gtest_filter = filter;
}
-#endif // ARCH_X86 || ARCH_X86_64
+#endif // VPX_ARCH_X86 || VPX_ARCH_X86_64
int main(int argc, char **argv) {
::testing::InitGoogleTest(&argc, argv);
-#if ARCH_X86 || ARCH_X86_64
+#if VPX_ARCH_X86 || VPX_ARCH_X86_64
const int simd_caps = x86_simd_caps();
if (!(simd_caps & HAS_MMX)) append_negative_gtest_filter(":MMX.*:MMX/*");
if (!(simd_caps & HAS_SSE)) append_negative_gtest_filter(":SSE.*:SSE/*");
@@ -56,7 +56,7 @@
if (!(simd_caps & HAS_AVX512)) {
append_negative_gtest_filter(":AVX512.*:AVX512/*");
}
-#endif // ARCH_X86 || ARCH_X86_64
+#endif // VPX_ARCH_X86 || VPX_ARCH_X86_64
#if !CONFIG_SHARED
// Shared library builds don't support whitebox tests
--- a/test/vp9_quantize_test.cc
+++ b/test/vp9_quantize_test.cc
@@ -505,7 +505,7 @@
#endif // HAVE_SSE2
#if HAVE_SSSE3
-#if ARCH_X86_64
+#if VPX_ARCH_X86_64
INSTANTIATE_TEST_CASE_P(
SSSE3, VP9QuantizeTest,
::testing::Values(make_tuple(&vpx_quantize_b_ssse3, &vpx_quantize_b_c,
@@ -528,7 +528,7 @@
&vpx_quantize_b_32x32_c, VPX_BITS_8, 32,
false)));
-#endif // ARCH_X86_64
+#endif // VPX_ARCH_X86_64
#endif // HAVE_SSSE3
#if HAVE_AVX
@@ -541,7 +541,7 @@
VPX_BITS_8, 32, false)));
#endif // HAVE_AVX
-#if ARCH_X86_64 && HAVE_AVX2
+#if VPX_ARCH_X86_64 && HAVE_AVX2
INSTANTIATE_TEST_CASE_P(
AVX2, VP9QuantizeTest,
::testing::Values(make_tuple(&QuantFPWrapper<vp9_quantize_fp_avx2>,
--- a/test/vpx_scale_test.cc
+++ b/test/vpx_scale_test.cc
@@ -22,7 +22,7 @@
namespace libvpx_test {
namespace {
-#if ARCH_ARM || (ARCH_MIPS && !HAVE_MIPS64) || ARCH_X86
+#if VPX_ARCH_ARM || (VPX_ARCH_MIPS && !HAVE_MIPS64) || VPX_ARCH_X86
// Avoid OOM failures on 32-bit platforms.
const int kNumSizesToTest = 7;
#else
--- a/third_party/x86inc/README.libvpx
+++ b/third_party/x86inc/README.libvpx
@@ -18,3 +18,4 @@
Use .text instead of .rodata on macho to avoid broken tables in PIC mode.
Use .text with no alignment for aout
Only use 'hidden' visibility with Chromium
+Prefix ARCH_* with VPX_.
--- a/third_party/x86inc/x86inc.asm
+++ b/third_party/x86inc/x86inc.asm
@@ -45,7 +45,7 @@
%endif
%ifndef STACK_ALIGNMENT
- %if ARCH_X86_64
+ %if VPX_ARCH_X86_64
%define STACK_ALIGNMENT 16
%else
%define STACK_ALIGNMENT 4
@@ -54,7 +54,7 @@
%define WIN64 0
%define UNIX64 0
-%if ARCH_X86_64
+%if VPX_ARCH_X86_64
%ifidn __OUTPUT_FORMAT__,win32
%define WIN64 1
%elifidn __OUTPUT_FORMAT__,win64
@@ -165,7 +165,7 @@
%endif
%endif
- %if ARCH_X86_64 == 0
+ %if VPX_ARCH_X86_64 == 0
%undef PIC
%endif
@@ -260,7 +260,7 @@
%if %0 == 2
%define r%1m %2d
%define r%1mp %2
- %elif ARCH_X86_64 ; memory
+ %elif VPX_ARCH_X86_64 ; memory
%define r%1m [rstk + stack_offset + %3]
%define r%1mp qword r %+ %1 %+ m
%else
@@ -281,7 +281,7 @@
%define e%1h %3
%define r%1b %2
%define e%1b %2
- %if ARCH_X86_64 == 0
+ %if VPX_ARCH_X86_64 == 0
%define r%1 e%1
%endif
%endmacro
@@ -318,7 +318,7 @@
DECLARE_REG_TMP_SIZE 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14
-%if ARCH_X86_64
+%if VPX_ARCH_X86_64
%define gprsize 8
%else
%define gprsize 4
@@ -485,7 +485,7 @@
%if %1 > 0
%assign regs_used (regs_used + 1)
%endif
- %if ARCH_X86_64 && regs_used < 5 + UNIX64 * 3
+ %if VPX_ARCH_X86_64 && regs_used < 5 + UNIX64 * 3
; Ensure that we don't clobber any registers containing arguments
%assign regs_used 5 + UNIX64 * 3
%endif
@@ -607,7 +607,7 @@
AUTO_REP_RET
%endmacro
-%elif ARCH_X86_64 ; *nix x64 ;=============================================
+%elif VPX_ARCH_X86_64 ; *nix x64 ;=============================================
DECLARE_REG 0, rdi
DECLARE_REG 1, rsi
@@ -948,7 +948,7 @@
%endif
%endif
- %if ARCH_X86_64 || cpuflag(sse2)
+ %if VPX_ARCH_X86_64 || cpuflag(sse2)
%ifdef __NASM_VER__
ALIGNMODE k8
%else
@@ -1005,7 +1005,7 @@
%define RESET_MM_PERMUTATION INIT_XMM %1
%define mmsize 16
%define num_mmregs 8
- %if ARCH_X86_64
+ %if VPX_ARCH_X86_64
%define num_mmregs 16
%endif
%define mova movdqa
@@ -1026,7 +1026,7 @@
%define RESET_MM_PERMUTATION INIT_YMM %1
%define mmsize 32
%define num_mmregs 8
- %if ARCH_X86_64
+ %if VPX_ARCH_X86_64
%define num_mmregs 16
%endif
%define mova movdqa
@@ -1637,7 +1637,7 @@
; workaround: vpbroadcastq is broken in x86_32 due to a yasm bug (fixed in 1.3.0)
%ifdef __YASM_VER__
- %if __YASM_VERSION_ID__ < 0x01030000 && ARCH_X86_64 == 0
+ %if __YASM_VERSION_ID__ < 0x01030000 && VPX_ARCH_X86_64 == 0
%macro vpbroadcastq 2
%if sizeof%1 == 16
movddup %1, %2
--- a/vp8/common/blockd.h
+++ b/vp8/common/blockd.h
@@ -292,7 +292,7 @@
struct vpx_internal_error_info error_info;
-#if ARCH_X86 || ARCH_X86_64
+#if VPX_ARCH_X86 || VPX_ARCH_X86_64
/* This is an intermediate buffer currently used in sub-pixel motion search
* to keep a copy of the reference area. This buffer can be used for other
* purpose.
--- a/vp8/common/generic/systemdependent.c
+++ b/vp8/common/generic/systemdependent.c
@@ -10,11 +10,11 @@
#include "vpx_config.h"
#include "vp8_rtcd.h"
-#if ARCH_ARM
+#if VPX_ARCH_ARM
#include "vpx_ports/arm.h"
-#elif ARCH_X86 || ARCH_X86_64
+#elif VPX_ARCH_X86 || VPX_ARCH_X86_64
#include "vpx_ports/x86.h"
-#elif ARCH_PPC
+#elif VPX_ARCH_PPC
#include "vpx_ports/ppc.h"
#endif
#include "vp8/common/onyxc_int.h"
@@ -92,11 +92,11 @@
(void)ctx;
#endif /* CONFIG_MULTITHREAD */
-#if ARCH_ARM
+#if VPX_ARCH_ARM
ctx->cpu_caps = arm_cpu_caps();
-#elif ARCH_X86 || ARCH_X86_64
+#elif VPX_ARCH_X86 || VPX_ARCH_X86_64
ctx->cpu_caps = x86_simd_caps();
-#elif ARCH_PPC
+#elif VPX_ARCH_PPC
ctx->cpu_caps = ppc_simd_caps();
#endif
}
--- a/vp8/common/loopfilter.h
+++ b/vp8/common/loopfilter.h
@@ -26,7 +26,7 @@
typedef enum { NORMAL_LOOPFILTER = 0, SIMPLE_LOOPFILTER = 1 } LOOPFILTERTYPE;
-#if ARCH_ARM
+#if VPX_ARCH_ARM
#define SIMD_WIDTH 1
#else
#define SIMD_WIDTH 16
--- a/vp8/common/threading.h
+++ b/vp8/common/threading.h
@@ -185,7 +185,7 @@
#endif
-#if ARCH_X86 || ARCH_X86_64
+#if VPX_ARCH_X86 || VPX_ARCH_X86_64
#include "vpx_ports/x86.h"
#else
#define x86_pause_hint()
--- a/vp8/common/x86/loopfilter_x86.c
+++ b/vp8/common/x86/loopfilter_x86.c
@@ -22,7 +22,7 @@
#define prototype_simple_loopfilter(sym) \
void sym(unsigned char *y, int ystride, const unsigned char *blimit)
-#if HAVE_SSE2 && ARCH_X86_64
+#if HAVE_SSE2 && VPX_ARCH_X86_64
prototype_loopfilter(vp8_loop_filter_bv_y_sse2);
prototype_loopfilter(vp8_loop_filter_bh_y_sse2);
#else
@@ -68,7 +68,7 @@
void vp8_loop_filter_bh_sse2(unsigned char *y_ptr, unsigned char *u_ptr,
unsigned char *v_ptr, int y_stride, int uv_stride,
loop_filter_info *lfi) {
-#if ARCH_X86_64
+#if VPX_ARCH_X86_64
vp8_loop_filter_bh_y_sse2(y_ptr, y_stride, lfi->blim, lfi->lim, lfi->hev_thr,
2);
#else
@@ -101,7 +101,7 @@
void vp8_loop_filter_bv_sse2(unsigned char *y_ptr, unsigned char *u_ptr,
unsigned char *v_ptr, int y_stride, int uv_stride,
loop_filter_info *lfi) {
-#if ARCH_X86_64
+#if VPX_ARCH_X86_64
vp8_loop_filter_bv_y_sse2(y_ptr, y_stride, lfi->blim, lfi->lim, lfi->hev_thr,
2);
#else
--- a/vp8/decoder/onyxd_if.c
+++ b/vp8/decoder/onyxd_if.c
@@ -37,7 +37,7 @@
#if CONFIG_ERROR_CONCEALMENT
#include "error_concealment.h"
#endif
-#if ARCH_ARM
+#if VPX_ARCH_ARM
#include "vpx_ports/arm.h"
#endif
--- a/vp8/encoder/mcomp.c
+++ b/vp8/encoder/mcomp.c
@@ -251,7 +251,7 @@
int pre_stride = x->e_mbd.pre.y_stride;
unsigned char *base_pre = x->e_mbd.pre.y_buffer;
-#if ARCH_X86 || ARCH_X86_64
+#if VPX_ARCH_X86 || VPX_ARCH_X86_64
MACROBLOCKD *xd = &x->e_mbd;
unsigned char *y_0 = base_pre + d->offset + (bestmv->as_mv.row) * pre_stride +
bestmv->as_mv.col;
@@ -380,7 +380,7 @@
int pre_stride = x->e_mbd.pre.y_stride;
unsigned char *base_pre = x->e_mbd.pre.y_buffer;
-#if ARCH_X86 || ARCH_X86_64
+#if VPX_ARCH_X86 || VPX_ARCH_X86_64
MACROBLOCKD *xd = &x->e_mbd;
unsigned char *y_0 = base_pre + d->offset + (bestmv->as_mv.row) * pre_stride +
bestmv->as_mv.col;
@@ -676,7 +676,7 @@
int pre_stride = x->e_mbd.pre.y_stride;
unsigned char *base_pre = x->e_mbd.pre.y_buffer;
-#if ARCH_X86 || ARCH_X86_64
+#if VPX_ARCH_X86 || VPX_ARCH_X86_64
MACROBLOCKD *xd = &x->e_mbd;
unsigned char *y_0 = base_pre + d->offset + (bestmv->as_mv.row) * pre_stride +
bestmv->as_mv.col;
--- a/vp8/encoder/onyx_if.c
+++ b/vp8/encoder/onyx_if.c
@@ -38,7 +38,7 @@
#include "vpx_ports/system_state.h"
#include "vpx_ports/vpx_timer.h"
#include "vpx_util/vpx_write_yuv_frame.h"
-#if ARCH_ARM
+#if VPX_ARCH_ARM
#include "vpx_ports/arm.h"
#endif
#if CONFIG_MULTI_RES_ENCODING
@@ -2043,7 +2043,7 @@
cpi->fn_ptr[BLOCK_4X4].sdx8f = vpx_sad4x4x8;
cpi->fn_ptr[BLOCK_4X4].sdx4df = vpx_sad4x4x4d;
-#if ARCH_X86 || ARCH_X86_64
+#if VPX_ARCH_X86 || VPX_ARCH_X86_64
cpi->fn_ptr[BLOCK_16X16].copymem = vp8_copy32xn;
cpi->fn_ptr[BLOCK_16X8].copymem = vp8_copy32xn;
cpi->fn_ptr[BLOCK_8X16].copymem = vp8_copy32xn;
--- a/vp8/encoder/picklpf.c
+++ b/vp8/encoder/picklpf.c
@@ -18,7 +18,7 @@
#include "vpx_scale/vpx_scale.h"
#include "vp8/common/alloccommon.h"
#include "vp8/common/loopfilter.h"
-#if ARCH_ARM
+#if VPX_ARCH_ARM
#include "vpx_ports/arm.h"
#endif
--- a/vp8/vp8_common.mk
+++ b/vp8/vp8_common.mk
@@ -69,8 +69,8 @@
VP8_COMMON_SRCS-yes += common/treecoder.c
-VP8_COMMON_SRCS-$(ARCH_X86)$(ARCH_X86_64) += common/x86/vp8_asm_stubs.c
-VP8_COMMON_SRCS-$(ARCH_X86)$(ARCH_X86_64) += common/x86/loopfilter_x86.c
+VP8_COMMON_SRCS-$(VPX_ARCH_X86)$(VPX_ARCH_X86_64) += common/x86/vp8_asm_stubs.c
+VP8_COMMON_SRCS-$(VPX_ARCH_X86)$(VPX_ARCH_X86_64) += common/x86/loopfilter_x86.c
VP8_COMMON_SRCS-$(CONFIG_POSTPROC) += common/mfqe.c
VP8_COMMON_SRCS-$(CONFIG_POSTPROC) += common/postproc.h
VP8_COMMON_SRCS-$(CONFIG_POSTPROC) += common/postproc.c
@@ -92,7 +92,7 @@
VP8_COMMON_SRCS-$(HAVE_SSE2) += common/x86/mfqe_sse2.asm
endif
-ifeq ($(ARCH_X86_64),yes)
+ifeq ($(VPX_ARCH_X86_64),yes)
VP8_COMMON_SRCS-$(HAVE_SSE2) += common/x86/loopfilter_block_sse2_x86_64.asm
endif
--- a/vp9/encoder/x86/vp9_diamond_search_sad_avx.c
+++ b/vp9/encoder/x86/vp9_diamond_search_sad_avx.c
@@ -114,7 +114,7 @@
// Work out the start point for the search
const uint8_t *best_address = in_what;
const uint8_t *new_best_address = best_address;
-#if ARCH_X86_64
+#if VPX_ARCH_X86_64
__m128i v_ba_q = _mm_set1_epi64x((intptr_t)best_address);
#else
__m128i v_ba_d = _mm_set1_epi32((intptr_t)best_address);
@@ -138,7 +138,7 @@
for (i = 0, step = 0; step < tot_steps; step++) {
for (j = 0; j < cfg->searches_per_step; j += 4, i += 4) {
__m128i v_sad_d, v_cost_d, v_outside_d, v_inside_d, v_diff_mv_w;
-#if ARCH_X86_64
+#if VPX_ARCH_X86_64
__m128i v_blocka[2];
#else
__m128i v_blocka[1];
@@ -175,7 +175,7 @@
// Compute the SIMD pointer offsets.
{
-#if ARCH_X86_64 // sizeof(intptr_t) == 8
+#if VPX_ARCH_X86_64 // sizeof(intptr_t) == 8
// Load the offsets
__m128i v_bo10_q = _mm_loadu_si128((const __m128i *)&ss_os[i + 0]);
__m128i v_bo32_q = _mm_loadu_si128((const __m128i *)&ss_os[i + 2]);
@@ -186,7 +186,7 @@
// Compute the candidate addresses
v_blocka[0] = _mm_add_epi64(v_ba_q, v_bo10_q);
v_blocka[1] = _mm_add_epi64(v_ba_q, v_bo32_q);
-#else // ARCH_X86 // sizeof(intptr_t) == 4
+#else // VPX_ARCH_X86 // sizeof(intptr_t) == 4
__m128i v_bo_d = _mm_loadu_si128((const __m128i *)&ss_os[i]);
v_bo_d = _mm_and_si128(v_bo_d, v_inside_d);
v_blocka[0] = _mm_add_epi32(v_ba_d, v_bo_d);
@@ -294,7 +294,7 @@
best_address = new_best_address;
v_bmv_w = _mm_set1_epi32(bmv.as_int);
-#if ARCH_X86_64
+#if VPX_ARCH_X86_64
v_ba_q = _mm_set1_epi64x((intptr_t)best_address);
#else
v_ba_d = _mm_set1_epi32((intptr_t)best_address);
--- a/vp9/encoder/x86/vp9_error_sse2.asm
+++ b/vp9/encoder/x86/vp9_error_sse2.asm
@@ -58,7 +58,7 @@
movhlps m7, m6
paddq m4, m5
paddq m6, m7
-%if ARCH_X86_64
+%if VPX_ARCH_X86_64
movq rax, m4
movq [sszq], m6
%else
@@ -105,7 +105,7 @@
; accumulate horizontally and store in return value
movhlps m5, m4
paddq m4, m5
-%if ARCH_X86_64
+%if VPX_ARCH_X86_64
movq rax, m4
%else
pshufd m5, m4, 0x1
--- a/vp9/vp9cx.mk
+++ b/vp9/vp9cx.mk
@@ -118,7 +118,7 @@
VP9_CX_SRCS-$(HAVE_SSE2) += encoder/x86/vp9_dct_sse2.asm
VP9_CX_SRCS-$(HAVE_SSE2) += encoder/x86/vp9_error_sse2.asm
-ifeq ($(ARCH_X86_64),yes)
+ifeq ($(VPX_ARCH_X86_64),yes)
VP9_CX_SRCS-$(HAVE_SSSE3) += encoder/x86/vp9_quantize_ssse3_x86_64.asm
endif
--- a/vpx/src/vpx_encoder.c
+++ b/vpx/src/vpx_encoder.c
@@ -173,7 +173,7 @@
return res;
}
-#if ARCH_X86 || ARCH_X86_64
+#if VPX_ARCH_X86 || VPX_ARCH_X86_64
/* On X86, disable the x87 unit's internal 80 bit precision for better
* consistency with the SSE unit's 64 bit precision.
*/
--- a/vpx_dsp/quantize.c
+++ b/vpx_dsp/quantize.c
@@ -260,7 +260,7 @@
15;
qcoeff_ptr[rc] = (tmp ^ coeff_sign) - coeff_sign;
-#if (ARCH_X86 || ARCH_X86_64) && !CONFIG_VP9_HIGHBITDEPTH
+#if (VPX_ARCH_X86 || VPX_ARCH_X86_64) && !CONFIG_VP9_HIGHBITDEPTH
// When tran_low_t is only 16 bits dqcoeff can outrange it. Rather than
// truncating with a cast, saturate the value. This is easier to implement
// on x86 and preserves the sign of the value.
@@ -268,7 +268,7 @@
clamp(qcoeff_ptr[rc] * dequant_ptr[rc != 0] / 2, INT16_MIN, INT16_MAX);
#else
dqcoeff_ptr[rc] = qcoeff_ptr[rc] * dequant_ptr[rc != 0] / 2;
-#endif // ARCH_X86 && CONFIG_VP9_HIGHBITDEPTH
+#endif // VPX_ARCH_X86 && CONFIG_VP9_HIGHBITDEPTH
if (tmp) eob = idx_arr[i];
}
--- a/vpx_dsp/variance.h
+++ b/vpx_dsp/variance.h
@@ -62,7 +62,7 @@
vpx_sad_multi_fn_t sdx3f;
vpx_sad_multi_fn_t sdx8f;
vpx_sad_multi_d_fn_t sdx4df;
-#if ARCH_X86 || ARCH_X86_64
+#if VPX_ARCH_X86 || VPX_ARCH_X86_64
vp8_copy32xn_fn_t copymem;
#endif
} vp8_variance_fn_ptr_t;
--- a/vpx_dsp/vpx_dsp.mk
+++ b/vpx_dsp/vpx_dsp.mk
@@ -87,7 +87,7 @@
DSP_SRCS-yes += vpx_convolve.c
DSP_SRCS-yes += vpx_convolve.h
-DSP_SRCS-$(ARCH_X86)$(ARCH_X86_64) += x86/convolve.h
+DSP_SRCS-$(VPX_ARCH_X86)$(VPX_ARCH_X86_64) += x86/convolve.h
DSP_SRCS-$(HAVE_SSE2) += x86/convolve_sse2.h
DSP_SRCS-$(HAVE_SSSE3) += x86/convolve_ssse3.h
@@ -205,7 +205,7 @@
DSP_SRCS-$(HAVE_SSE2) += x86/fwd_txfm_sse2.c
DSP_SRCS-$(HAVE_SSE2) += x86/fwd_txfm_impl_sse2.h
DSP_SRCS-$(HAVE_SSE2) += x86/fwd_dct32x32_impl_sse2.h
-ifeq ($(ARCH_X86_64),yes)
+ifeq ($(VPX_ARCH_X86_64),yes)
DSP_SRCS-$(HAVE_SSSE3) += x86/fwd_txfm_ssse3_x86_64.asm
endif
DSP_SRCS-$(HAVE_AVX2) += x86/fwd_txfm_avx2.c
@@ -316,7 +316,7 @@
DSP_SRCS-$(HAVE_NEON) += arm/avg_neon.c
DSP_SRCS-$(HAVE_NEON) += arm/hadamard_neon.c
DSP_SRCS-$(HAVE_MSA) += mips/avg_msa.c
-ifeq ($(ARCH_X86_64),yes)
+ifeq ($(VPX_ARCH_X86_64),yes)
DSP_SRCS-$(HAVE_SSSE3) += x86/avg_ssse3_x86_64.asm
endif
DSP_SRCS-$(HAVE_VSX) += ppc/hadamard_vsx.c
@@ -384,9 +384,9 @@
DSP_SRCS-$(HAVE_AVX2) += x86/variance_avx2.c
DSP_SRCS-$(HAVE_VSX) += ppc/variance_vsx.c
-ifeq ($(ARCH_X86_64),yes)
+ifeq ($(VPX_ARCH_X86_64),yes)
DSP_SRCS-$(HAVE_SSE2) += x86/ssim_opt_x86_64.asm
-endif # ARCH_X86_64
+endif # VPX_ARCH_X86_64
DSP_SRCS-$(HAVE_SSE2) += x86/subpel_variance_sse2.asm # Contains SSE2 and SSSE3
--- a/vpx_dsp/x86/avg_ssse3_x86_64.asm
+++ b/vpx_dsp/x86/avg_ssse3_x86_64.asm
@@ -13,7 +13,7 @@
SECTION .text
-%if ARCH_X86_64
+%if VPX_ARCH_X86_64
; matrix transpose
%macro TRANSPOSE8X8 10
; stage 1
--- a/vpx_dsp/x86/fwd_txfm_ssse3_x86_64.asm
+++ b/vpx_dsp/x86/fwd_txfm_ssse3_x86_64.asm
@@ -27,7 +27,7 @@
SECTION .text
-%if ARCH_X86_64
+%if VPX_ARCH_X86_64
INIT_XMM ssse3
cglobal fdct8x8, 3, 5, 13, input, output, stride
--- a/vpx_dsp/x86/highbd_sad_sse2.asm
+++ b/vpx_dsp/x86/highbd_sad_sse2.asm
@@ -25,11 +25,11 @@
cglobal highbd_sad%1x%2_avg, 5, 1 + %3, 7, src, src_stride, ref, ref_stride, \
second_pred, n_rows
%else ; %3 == 7
-cglobal highbd_sad%1x%2_avg, 5, ARCH_X86_64 + %3, 7, src, src_stride, \
+cglobal highbd_sad%1x%2_avg, 5, VPX_ARCH_X86_64 + %3, 7, src, src_stride, \
ref, ref_stride, \
second_pred, \
src_stride3, ref_stride3
-%if ARCH_X86_64
+%if VPX_ARCH_X86_64
%define n_rowsd r7d
%else ; x86-32
%define n_rowsd dword r0m
--- a/vpx_dsp/x86/highbd_subpel_variance_impl_sse2.asm
+++ b/vpx_dsp/x86/highbd_subpel_variance_impl_sse2.asm
@@ -78,7 +78,7 @@
%endmacro
%macro INC_SRC_BY_SRC_STRIDE 0
-%if ARCH_X86=1 && CONFIG_PIC=1
+%if VPX_ARCH_X86=1 && CONFIG_PIC=1
add srcq, src_stridemp
add srcq, src_stridemp
%else
@@ -91,7 +91,7 @@
%define filter_idx_shift 5
-%if ARCH_X86_64
+%if VPX_ARCH_X86_64
%if %2 == 1 ; avg
cglobal highbd_sub_pixel_avg_variance%1xh, 9, 10, 13, src, src_stride, \
x_offset, y_offset, \
@@ -268,11 +268,11 @@
.x_zero_y_nonhalf:
; x_offset == 0 && y_offset == bilin interpolation
-%if ARCH_X86_64
+%if VPX_ARCH_X86_64
lea bilin_filter, [GLOBAL(bilin_filter_m)]
%endif
shl y_offsetd, filter_idx_shift
-%if ARCH_X86_64 && mmsize == 16
+%if VPX_ARCH_X86_64 && mmsize == 16
mova m8, [bilin_filter+y_offsetq]
mova m9, [bilin_filter+y_offsetq+16]
mova m10, [GLOBAL(pw_8)]
@@ -280,7 +280,7 @@
%define filter_y_b m9
%define filter_rnd m10
%else ; x86-32 or mmx
-%if ARCH_X86=1 && CONFIG_PIC=1
+%if VPX_ARCH_X86=1 && CONFIG_PIC=1
; x_offset == 0, reuse x_offset reg
%define tempq x_offsetq
add y_offsetq, g_bilin_filterm
@@ -495,11 +495,11 @@
.x_half_y_nonhalf:
; x_offset == 0.5 && y_offset == bilin interpolation
-%if ARCH_X86_64
+%if VPX_ARCH_X86_64
lea bilin_filter, [GLOBAL(bilin_filter_m)]
%endif
shl y_offsetd, filter_idx_shift
-%if ARCH_X86_64 && mmsize == 16
+%if VPX_ARCH_X86_64 && mmsize == 16
mova m8, [bilin_filter+y_offsetq]
mova m9, [bilin_filter+y_offsetq+16]
mova m10, [GLOBAL(pw_8)]
@@ -507,7 +507,7 @@
%define filter_y_b m9
%define filter_rnd m10
%else ; x86_32
-%if ARCH_X86=1 && CONFIG_PIC=1
+%if VPX_ARCH_X86=1 && CONFIG_PIC=1
; x_offset == 0.5. We can reuse x_offset reg
%define tempq x_offsetq
add y_offsetq, g_bilin_filterm
@@ -617,11 +617,11 @@
jnz .x_nonhalf_y_nonzero
; x_offset == bilin interpolation && y_offset == 0
-%if ARCH_X86_64
+%if VPX_ARCH_X86_64
lea bilin_filter, [GLOBAL(bilin_filter_m)]
%endif
shl x_offsetd, filter_idx_shift
-%if ARCH_X86_64 && mmsize == 16
+%if VPX_ARCH_X86_64 && mmsize == 16
mova m8, [bilin_filter+x_offsetq]
mova m9, [bilin_filter+x_offsetq+16]
mova m10, [GLOBAL(pw_8)]
@@ -629,7 +629,7 @@
%define filter_x_b m9
%define filter_rnd m10
%else ; x86-32
-%if ARCH_X86=1 && CONFIG_PIC=1
+%if VPX_ARCH_X86=1 && CONFIG_PIC=1
; y_offset == 0. We can reuse y_offset reg.
%define tempq y_offsetq
add x_offsetq, g_bilin_filterm
@@ -716,11 +716,11 @@
jne .x_nonhalf_y_nonhalf
; x_offset == bilin interpolation && y_offset == 0.5
-%if ARCH_X86_64
+%if VPX_ARCH_X86_64
lea bilin_filter, [GLOBAL(bilin_filter_m)]
%endif
shl x_offsetd, filter_idx_shift
-%if ARCH_X86_64 && mmsize == 16
+%if VPX_ARCH_X86_64 && mmsize == 16
mova m8, [bilin_filter+x_offsetq]
mova m9, [bilin_filter+x_offsetq+16]
mova m10, [GLOBAL(pw_8)]
@@ -728,7 +728,7 @@
%define filter_x_b m9
%define filter_rnd m10
%else ; x86-32
-%if ARCH_X86=1 && CONFIG_PIC=1
+%if VPX_ARCH_X86=1 && CONFIG_PIC=1
; y_offset == 0.5. We can reuse y_offset reg.
%define tempq y_offsetq
add x_offsetq, g_bilin_filterm
@@ -843,12 +843,12 @@
.x_nonhalf_y_nonhalf:
; loading filter - this is same as in 8-bit depth
-%if ARCH_X86_64
+%if VPX_ARCH_X86_64
lea bilin_filter, [GLOBAL(bilin_filter_m)]
%endif
shl x_offsetd, filter_idx_shift ; filter_idx_shift = 5
shl y_offsetd, filter_idx_shift
-%if ARCH_X86_64 && mmsize == 16
+%if VPX_ARCH_X86_64 && mmsize == 16
mova m8, [bilin_filter+x_offsetq]
mova m9, [bilin_filter+x_offsetq+16]
mova m10, [bilin_filter+y_offsetq]
@@ -860,7 +860,7 @@
%define filter_y_b m11
%define filter_rnd m12
%else ; x86-32
-%if ARCH_X86=1 && CONFIG_PIC=1
+%if VPX_ARCH_X86=1 && CONFIG_PIC=1
; In this case, there is NO unused register. Used src_stride register. Later,
; src_stride has to be loaded from stack when it is needed.
%define tempq src_strideq
--- a/vpx_dsp/x86/sad_sse2.asm
+++ b/vpx_dsp/x86/sad_sse2.asm
@@ -25,11 +25,11 @@
cglobal sad%1x%2_avg, 5, 1 + %3, 5, src, src_stride, ref, ref_stride, \
second_pred, n_rows
%else ; %3 == 7
-cglobal sad%1x%2_avg, 5, ARCH_X86_64 + %3, 6, src, src_stride, \
+cglobal sad%1x%2_avg, 5, VPX_ARCH_X86_64 + %3, 6, src, src_stride, \
ref, ref_stride, \
second_pred, \
src_stride3, ref_stride3
-%if ARCH_X86_64
+%if VPX_ARCH_X86_64
%define n_rowsd r7d
%else ; x86-32
%define n_rowsd dword r0m
--- a/vpx_dsp/x86/subpel_variance_sse2.asm
+++ b/vpx_dsp/x86/subpel_variance_sse2.asm
@@ -95,7 +95,7 @@
%endmacro
%macro INC_SRC_BY_SRC_STRIDE 0
-%if ARCH_X86=1 && CONFIG_PIC=1
+%if VPX_ARCH_X86=1 && CONFIG_PIC=1
add srcq, src_stridemp
%else
add srcq, src_strideq
@@ -114,7 +114,7 @@
; 11, not 13, if the registers are ordered correctly. May make a minor speed
; difference on Win64
-%if ARCH_X86_64
+%if VPX_ARCH_X86_64
%if %2 == 1 ; avg
cglobal sub_pixel_avg_variance%1xh, 9, 10, 13, src, src_stride, \
x_offset, y_offset, ref, ref_stride, \
@@ -352,11 +352,11 @@
.x_zero_y_nonhalf:
; x_offset == 0 && y_offset == bilin interpolation
-%if ARCH_X86_64
+%if VPX_ARCH_X86_64
lea bilin_filter, [GLOBAL(bilin_filter_m)]
%endif
shl y_offsetd, filter_idx_shift
-%if ARCH_X86_64 && %1 > 4
+%if VPX_ARCH_X86_64 && %1 > 4
mova m8, [bilin_filter+y_offsetq]
%if notcpuflag(ssse3) ; FIXME(rbultje) don't scatter registers on x86-64
mova m9, [bilin_filter+y_offsetq+16]
@@ -366,7 +366,7 @@
%define filter_y_b m9
%define filter_rnd m10
%else ; x86-32 or mmx
-%if ARCH_X86=1 && CONFIG_PIC=1
+%if VPX_ARCH_X86=1 && CONFIG_PIC=1
; x_offset == 0, reuse x_offset reg
%define tempq x_offsetq
add y_offsetq, g_bilin_filterm
@@ -675,11 +675,11 @@
.x_half_y_nonhalf:
; x_offset == 0.5 && y_offset == bilin interpolation
-%if ARCH_X86_64
+%if VPX_ARCH_X86_64
lea bilin_filter, [GLOBAL(bilin_filter_m)]
%endif
shl y_offsetd, filter_idx_shift
-%if ARCH_X86_64 && %1 > 4
+%if VPX_ARCH_X86_64 && %1 > 4
mova m8, [bilin_filter+y_offsetq]
%if notcpuflag(ssse3) ; FIXME(rbultje) don't scatter registers on x86-64
mova m9, [bilin_filter+y_offsetq+16]
@@ -689,7 +689,7 @@
%define filter_y_b m9
%define filter_rnd m10
%else ;x86_32
-%if ARCH_X86=1 && CONFIG_PIC=1
+%if VPX_ARCH_X86=1 && CONFIG_PIC=1
; x_offset == 0.5. We can reuse x_offset reg
%define tempq x_offsetq
add y_offsetq, g_bilin_filterm
@@ -833,11 +833,11 @@
jnz .x_nonhalf_y_nonzero
; x_offset == bilin interpolation && y_offset == 0
-%if ARCH_X86_64
+%if VPX_ARCH_X86_64
lea bilin_filter, [GLOBAL(bilin_filter_m)]
%endif
shl x_offsetd, filter_idx_shift
-%if ARCH_X86_64 && %1 > 4
+%if VPX_ARCH_X86_64 && %1 > 4
mova m8, [bilin_filter+x_offsetq]
%if notcpuflag(ssse3) ; FIXME(rbultje) don't scatter registers on x86-64
mova m9, [bilin_filter+x_offsetq+16]
@@ -847,7 +847,7 @@
%define filter_x_b m9
%define filter_rnd m10
%else ; x86-32
-%if ARCH_X86=1 && CONFIG_PIC=1
+%if VPX_ARCH_X86=1 && CONFIG_PIC=1
;y_offset == 0. We can reuse y_offset reg.
%define tempq y_offsetq
add x_offsetq, g_bilin_filterm
@@ -975,11 +975,11 @@
jne .x_nonhalf_y_nonhalf
; x_offset == bilin interpolation && y_offset == 0.5
-%if ARCH_X86_64
+%if VPX_ARCH_X86_64
lea bilin_filter, [GLOBAL(bilin_filter_m)]
%endif
shl x_offsetd, filter_idx_shift
-%if ARCH_X86_64 && %1 > 4
+%if VPX_ARCH_X86_64 && %1 > 4
mova m8, [bilin_filter+x_offsetq]
%if notcpuflag(ssse3) ; FIXME(rbultje) don't scatter registers on x86-64
mova m9, [bilin_filter+x_offsetq+16]
@@ -989,7 +989,7 @@
%define filter_x_b m9
%define filter_rnd m10
%else ; x86-32
-%if ARCH_X86=1 && CONFIG_PIC=1
+%if VPX_ARCH_X86=1 && CONFIG_PIC=1
; y_offset == 0.5. We can reuse y_offset reg.
%define tempq y_offsetq
add x_offsetq, g_bilin_filterm
@@ -1173,12 +1173,12 @@
STORE_AND_RET %1
.x_nonhalf_y_nonhalf:
-%if ARCH_X86_64
+%if VPX_ARCH_X86_64
lea bilin_filter, [GLOBAL(bilin_filter_m)]
%endif
shl x_offsetd, filter_idx_shift
shl y_offsetd, filter_idx_shift
-%if ARCH_X86_64 && %1 > 4
+%if VPX_ARCH_X86_64 && %1 > 4
mova m8, [bilin_filter+x_offsetq]
%if notcpuflag(ssse3) ; FIXME(rbultje) don't scatter registers on x86-64
mova m9, [bilin_filter+x_offsetq+16]
@@ -1194,7 +1194,7 @@
%define filter_y_b m11
%define filter_rnd m12
%else ; x86-32
-%if ARCH_X86=1 && CONFIG_PIC=1
+%if VPX_ARCH_X86=1 && CONFIG_PIC=1
; In this case, there is NO unused register. Used src_stride register. Later,
; src_stride has to be loaded from stack when it is needed.
%define tempq src_strideq
--- a/vpx_dsp/x86/sum_squares_sse2.c
+++ b/vpx_dsp/x86/sum_squares_sse2.c
@@ -92,7 +92,7 @@
v_acc_q = _mm_add_epi64(v_acc_q, _mm_srli_si128(v_acc_q, 8));
-#if ARCH_X86_64
+#if VPX_ARCH_X86_64
return (uint64_t)_mm_cvtsi128_si64(v_acc_q);
#else
{
--- a/vpx_dsp/x86/vpx_high_subpixel_bilinear_sse2.asm
+++ b/vpx_dsp/x86/vpx_high_subpixel_bilinear_sse2.asm
@@ -64,7 +64,7 @@
dec rcx
%endm
-%if ARCH_X86_64
+%if VPX_ARCH_X86_64
%macro HIGH_GET_PARAM 0
mov rdx, arg(5) ;filter ptr
mov rsi, arg(0) ;src_ptr
@@ -197,7 +197,7 @@
pop rbp
ret
-%if ARCH_X86_64
+%if VPX_ARCH_X86_64
global sym(vpx_highbd_filter_block1d8_v2_sse2) PRIVATE
sym(vpx_highbd_filter_block1d8_v2_sse2):
push rbp
@@ -277,7 +277,7 @@
pop rbp
ret
-%if ARCH_X86_64
+%if VPX_ARCH_X86_64
global sym(vpx_highbd_filter_block1d8_v2_avg_sse2) PRIVATE
sym(vpx_highbd_filter_block1d8_v2_avg_sse2):
push rbp
@@ -358,7 +358,7 @@
pop rbp
ret
-%if ARCH_X86_64
+%if VPX_ARCH_X86_64
global sym(vpx_highbd_filter_block1d8_h2_sse2) PRIVATE
sym(vpx_highbd_filter_block1d8_h2_sse2):
push rbp
@@ -439,7 +439,7 @@
pop rbp
ret
-%if ARCH_X86_64
+%if VPX_ARCH_X86_64
global sym(vpx_highbd_filter_block1d8_h2_avg_sse2) PRIVATE
sym(vpx_highbd_filter_block1d8_h2_avg_sse2):
push rbp
--- a/vpx_dsp/x86/vpx_subpixel_4t_intrin_sse2.c
+++ b/vpx_dsp/x86/vpx_subpixel_4t_intrin_sse2.c
@@ -602,7 +602,7 @@
}
}
-#if CONFIG_VP9_HIGHBITDEPTH && ARCH_X86_64
+#if CONFIG_VP9_HIGHBITDEPTH && VPX_ARCH_X86_64
static void vpx_highbd_filter_block1d4_h4_sse2(
const uint16_t *src_ptr, ptrdiff_t src_stride, uint16_t *dst_ptr,
ptrdiff_t dst_stride, uint32_t height, const int16_t *kernel, int bd) {
@@ -982,7 +982,7 @@
vpx_highbd_filter_block1d8_v4_sse2(src_ptr + 8, src_stride, dst_ptr + 8,
dst_stride, height, kernel, bd);
}
-#endif // CONFIG_VP9_HIGHBITDEPTH && ARCH_X86_64
+#endif // CONFIG_VP9_HIGHBITDEPTH && VPX_ARCH_X86_64
// From vpx_subpixel_8t_sse2.asm.
filter8_1dfunction vpx_filter_block1d16_v8_sse2;
@@ -1060,7 +1060,7 @@
FUN_CONV_2D(, sse2, 0);
FUN_CONV_2D(avg_, sse2, 1);
-#if CONFIG_VP9_HIGHBITDEPTH && ARCH_X86_64
+#if CONFIG_VP9_HIGHBITDEPTH && VPX_ARCH_X86_64
// From vpx_dsp/x86/vpx_high_subpixel_8t_sse2.asm.
highbd_filter8_1dfunction vpx_highbd_filter_block1d16_v8_sse2;
highbd_filter8_1dfunction vpx_highbd_filter_block1d16_h8_sse2;
@@ -1158,4 +1158,4 @@
// int y_step_q4, int w, int h, int bd);
HIGH_FUN_CONV_2D(, sse2, 0);
HIGH_FUN_CONV_2D(avg_, sse2, 1);
-#endif // CONFIG_VP9_HIGHBITDEPTH && ARCH_X86_64
+#endif // CONFIG_VP9_HIGHBITDEPTH && VPX_ARCH_X86_64
--- a/vpx_dsp/x86/vpx_subpixel_8t_intrin_avx2.c
+++ b/vpx_dsp/x86/vpx_subpixel_8t_intrin_avx2.c
@@ -894,7 +894,7 @@
#if HAVE_AVX2 && HAVE_SSSE3
filter8_1dfunction vpx_filter_block1d4_v8_ssse3;
-#if ARCH_X86_64
+#if VPX_ARCH_X86_64
filter8_1dfunction vpx_filter_block1d8_v8_intrin_ssse3;
filter8_1dfunction vpx_filter_block1d8_h8_intrin_ssse3;
filter8_1dfunction vpx_filter_block1d4_h8_intrin_ssse3;
@@ -901,7 +901,7 @@
#define vpx_filter_block1d8_v8_avx2 vpx_filter_block1d8_v8_intrin_ssse3
#define vpx_filter_block1d8_h8_avx2 vpx_filter_block1d8_h8_intrin_ssse3
#define vpx_filter_block1d4_h8_avx2 vpx_filter_block1d4_h8_intrin_ssse3
-#else // ARCH_X86
+#else // VPX_ARCH_X86
filter8_1dfunction vpx_filter_block1d8_v8_ssse3;
filter8_1dfunction vpx_filter_block1d8_h8_ssse3;
filter8_1dfunction vpx_filter_block1d4_h8_ssse3;
@@ -908,7 +908,7 @@
#define vpx_filter_block1d8_v8_avx2 vpx_filter_block1d8_v8_ssse3
#define vpx_filter_block1d8_h8_avx2 vpx_filter_block1d8_h8_ssse3
#define vpx_filter_block1d4_h8_avx2 vpx_filter_block1d4_h8_ssse3
-#endif // ARCH_X86_64
+#endif // VPX_ARCH_X86_64
filter8_1dfunction vpx_filter_block1d8_v8_avg_ssse3;
filter8_1dfunction vpx_filter_block1d8_h8_avg_ssse3;
filter8_1dfunction vpx_filter_block1d4_v8_avg_ssse3;
--- a/vpx_dsp/x86/vpx_subpixel_8t_intrin_ssse3.c
+++ b/vpx_dsp/x86/vpx_subpixel_8t_intrin_ssse3.c
@@ -31,7 +31,7 @@
}
// Used by the avx2 implementation.
-#if ARCH_X86_64
+#if VPX_ARCH_X86_64
// Use the intrinsics below
filter8_1dfunction vpx_filter_block1d4_h8_intrin_ssse3;
filter8_1dfunction vpx_filter_block1d8_h8_intrin_ssse3;
@@ -39,7 +39,7 @@
#define vpx_filter_block1d4_h8_ssse3 vpx_filter_block1d4_h8_intrin_ssse3
#define vpx_filter_block1d8_h8_ssse3 vpx_filter_block1d8_h8_intrin_ssse3
#define vpx_filter_block1d8_v8_ssse3 vpx_filter_block1d8_v8_intrin_ssse3
-#else // ARCH_X86
+#else // VPX_ARCH_X86
// Use the assembly in vpx_dsp/x86/vpx_subpixel_8t_ssse3.asm.
filter8_1dfunction vpx_filter_block1d4_h8_ssse3;
filter8_1dfunction vpx_filter_block1d8_h8_ssse3;
@@ -46,7 +46,7 @@
filter8_1dfunction vpx_filter_block1d8_v8_ssse3;
#endif
-#if ARCH_X86_64
+#if VPX_ARCH_X86_64
void vpx_filter_block1d4_h8_intrin_ssse3(
const uint8_t *src_ptr, ptrdiff_t src_pitch, uint8_t *output_ptr,
ptrdiff_t output_pitch, uint32_t output_height, const int16_t *filter) {
@@ -198,7 +198,7 @@
output_ptr += out_pitch;
}
}
-#endif // ARCH_X86_64
+#endif // VPX_ARCH_X86_64
static void vpx_filter_block1d16_h4_ssse3(const uint8_t *src_ptr,
ptrdiff_t src_stride,
--- a/vpx_dsp/x86/vpx_subpixel_8t_ssse3.asm
+++ b/vpx_dsp/x86/vpx_subpixel_8t_ssse3.asm
@@ -26,7 +26,7 @@
%define LOCAL_VARS_SIZE 16*6
%macro SETUP_LOCAL_VARS 0
- ; TODO(slavarnway): using xmm registers for these on ARCH_X86_64 +
+ ; TODO(slavarnway): using xmm registers for these on VPX_ARCH_X86_64 +
; pmaddubsw has a higher latency on some platforms, this might be eased by
; interleaving the instructions.
%define k0k1 [rsp + 16*0]
@@ -48,7 +48,7 @@
mova k2k3, m1
mova k4k5, m2
mova k6k7, m3
-%if ARCH_X86_64
+%if VPX_ARCH_X86_64
%define krd m12
%define tmp0 [rsp + 16*4]
%define tmp1 [rsp + 16*5]
@@ -68,7 +68,7 @@
%endm
;-------------------------------------------------------------------------------
-%if ARCH_X86_64
+%if VPX_ARCH_X86_64
%define LOCAL_VARS_SIZE_H4 0
%else
%define LOCAL_VARS_SIZE_H4 16*4
@@ -79,7 +79,7 @@
src, sstride, dst, dstride, height, filter
mova m4, [filterq]
packsswb m4, m4
-%if ARCH_X86_64
+%if VPX_ARCH_X86_64
%define k0k1k4k5 m8
%define k2k3k6k7 m9
%define krd m10
@@ -339,7 +339,7 @@
; TODO(Linfeng): Detect cpu type and choose the code with better performance.
%define X86_SUBPIX_VFILTER_PREFER_SLOW_CELERON 1
-%if ARCH_X86_64 && X86_SUBPIX_VFILTER_PREFER_SLOW_CELERON
+%if VPX_ARCH_X86_64 && X86_SUBPIX_VFILTER_PREFER_SLOW_CELERON
%define NUM_GENERAL_REG_USED 9
%else
%define NUM_GENERAL_REG_USED 6
@@ -359,9 +359,9 @@
dec heightd
-%if ARCH_X86 || X86_SUBPIX_VFILTER_PREFER_SLOW_CELERON
+%if VPX_ARCH_X86 || X86_SUBPIX_VFILTER_PREFER_SLOW_CELERON
-%if ARCH_X86_64
+%if VPX_ARCH_X86_64
%define src1q r7
%define sstride6q r8
%define dst_stride dstrideq
@@ -467,7 +467,7 @@
movx [dstq], m0
%else
- ; ARCH_X86_64
+ ; VPX_ARCH_X86_64
movx m0, [srcq ] ;A
movx m1, [srcq + sstrideq ] ;B
@@ -567,7 +567,7 @@
%endif
movx [dstq], m0
-%endif ; ARCH_X86_64
+%endif ; VPX_ARCH_X86_64
.done:
REP_RET
@@ -581,9 +581,9 @@
mova m4, [filterq]
SETUP_LOCAL_VARS
-%if ARCH_X86 || X86_SUBPIX_VFILTER_PREFER_SLOW_CELERON
+%if VPX_ARCH_X86 || X86_SUBPIX_VFILTER_PREFER_SLOW_CELERON
-%if ARCH_X86_64
+%if VPX_ARCH_X86_64
%define src1q r7
%define sstride6q r8
%define dst_stride dstrideq
@@ -654,7 +654,7 @@
REP_RET
%else
- ; ARCH_X86_64
+ ; VPX_ARCH_X86_64
dec heightd
movu m1, [srcq ] ;A
@@ -790,7 +790,7 @@
.done:
REP_RET
-%endif ; ARCH_X86_64
+%endif ; VPX_ARCH_X86_64
%endm
--- a/vpx_ports/system_state.h
+++ b/vpx_ports/system_state.h
@@ -17,11 +17,11 @@
extern "C" {
#endif
-#if (ARCH_X86 || ARCH_X86_64) && HAVE_MMX
+#if (VPX_ARCH_X86 || VPX_ARCH_X86_64) && HAVE_MMX
extern void vpx_clear_system_state(void);
#else
#define vpx_clear_system_state()
-#endif // (ARCH_X86 || ARCH_X86_64) && HAVE_MMX
+#endif // (VPX_ARCH_X86 || VPX_ARCH_X86_64) && HAVE_MMX
#ifdef __cplusplus
} // extern "C"
--- a/vpx_ports/vpx_ports.mk
+++ b/vpx_ports/vpx_ports.mk
@@ -17,29 +17,29 @@
PORTS_SRCS-yes += system_state.h
PORTS_SRCS-yes += vpx_timer.h
-ifeq ($(ARCH_X86),yes)
+ifeq ($(VPX_ARCH_X86),yes)
PORTS_SRCS-$(HAVE_MMX) += emms_mmx.c
endif
-ifeq ($(ARCH_X86_64),yes)
+ifeq ($(VPX_ARCH_X86_64),yes)
# Visual Studio x64 does not support the _mm_empty() intrinsic.
PORTS_SRCS-$(HAVE_MMX) += emms_mmx.asm
endif
-ifeq ($(ARCH_X86_64),yes)
+ifeq ($(VPX_ARCH_X86_64),yes)
PORTS_SRCS-$(CONFIG_MSVS) += float_control_word.asm
endif
-ifeq ($(ARCH_X86)$(ARCH_X86_64),yes)
+ifeq ($(VPX_ARCH_X86)$(VPX_ARCH_X86_64),yes)
PORTS_SRCS-yes += x86.h
PORTS_SRCS-yes += x86_abi_support.asm
endif
-PORTS_SRCS-$(ARCH_ARM) += arm_cpudetect.c
-PORTS_SRCS-$(ARCH_ARM) += arm.h
+PORTS_SRCS-$(VPX_ARCH_ARM) += arm_cpudetect.c
+PORTS_SRCS-$(VPX_ARCH_ARM) += arm.h
-PORTS_SRCS-$(ARCH_PPC) += ppc_cpudetect.c
-PORTS_SRCS-$(ARCH_PPC) += ppc.h
+PORTS_SRCS-$(VPX_ARCH_PPC) += ppc_cpudetect.c
+PORTS_SRCS-$(VPX_ARCH_PPC) += ppc.h
-ifeq ($(ARCH_MIPS), yes)
+ifeq ($(VPX_ARCH_MIPS), yes)
PORTS_SRCS-yes += asmdefs_mmi.h
endif
--- a/vpx_ports/x86.h
+++ b/vpx_ports/x86.h
@@ -43,7 +43,7 @@
} vpx_cpu_t;
#if defined(__GNUC__) && __GNUC__ || defined(__ANDROID__)
-#if ARCH_X86_64
+#if VPX_ARCH_X86_64
#define cpuid(func, func2, ax, bx, cx, dx) \
__asm__ __volatile__("cpuid \n\t" \
: "=a"(ax), "=b"(bx), "=c"(cx), "=d"(dx) \
@@ -59,7 +59,7 @@
#endif
#elif defined(__SUNPRO_C) || \
defined(__SUNPRO_CC) /* end __GNUC__ or __ANDROID__*/
-#if ARCH_X86_64
+#if VPX_ARCH_X86_64
#define cpuid(func, func2, ax, bx, cx, dx) \
asm volatile( \
"xchg %rsi, %rbx \n\t" \
@@ -79,7 +79,7 @@
: "a"(func), "c"(func2));
#endif
#else /* end __SUNPRO__ */
-#if ARCH_X86_64
+#if VPX_ARCH_X86_64
#if defined(_MSC_VER) && _MSC_VER > 1500
#define cpuid(func, func2, a, b, c, d) \
do { \
@@ -253,7 +253,7 @@
asm volatile("rdtsc\n\t" : "=a"(tsc) :);
return tsc;
#else
-#if ARCH_X86_64
+#if VPX_ARCH_X86_64
return (unsigned int)__rdtsc();
#else
__asm rdtsc;
@@ -271,7 +271,7 @@
asm volatile("rdtsc\n\t" : "=a"(lo), "=d"(hi));
return ((uint64_t)hi << 32) | lo;
#else
-#if ARCH_X86_64
+#if VPX_ARCH_X86_64
return (uint64_t)__rdtsc();
#else
__asm rdtsc;
@@ -293,7 +293,7 @@
unsigned int ui;
return (unsigned int)__rdtscp(&ui);
#else
-#if ARCH_X86_64
+#if VPX_ARCH_X86_64
return (unsigned int)__rdtscp();
#else
__asm rdtscp;
@@ -319,7 +319,7 @@
#elif defined(__SUNPRO_C) || defined(__SUNPRO_CC)
#define x86_pause_hint() asm volatile("pause \n\t")
#else
-#if ARCH_X86_64
+#if VPX_ARCH_X86_64
#define x86_pause_hint() _mm_pause();
#else
#define x86_pause_hint() __asm pause
@@ -344,7 +344,7 @@
asm volatile("fstcw %0\n\t" : "=m"(*&mode) :);
return mode;
}
-#elif ARCH_X86_64
+#elif VPX_ARCH_X86_64
/* No fldcw intrinsics on Windows x64, punt to external asm */
extern void vpx_winx64_fldcw(unsigned short mode);
extern unsigned short vpx_winx64_fstcw(void);
--- a/vpx_util/vpx_atomics.h
+++ b/vpx_util/vpx_atomics.h
@@ -51,16 +51,16 @@
do { \
} while (0)
#else
-#if ARCH_X86 || ARCH_X86_64
+#if VPX_ARCH_X86 || VPX_ARCH_X86_64
// Use a compiler barrier on x86, no runtime penalty.
#define vpx_atomic_memory_barrier() __asm__ __volatile__("" ::: "memory")
-#elif ARCH_ARM
+#elif VPX_ARCH_ARM
#define vpx_atomic_memory_barrier() __asm__ __volatile__("dmb ish" ::: "memory")
-#elif ARCH_MIPS
+#elif VPX_ARCH_MIPS
#define vpx_atomic_memory_barrier() __asm__ __volatile__("sync" ::: "memory")
#else
#error Unsupported architecture!
-#endif // ARCH_X86 || ARCH_X86_64
+#endif // VPX_ARCH_X86 || VPX_ARCH_X86_64
#endif // defined(_MSC_VER)
#endif // atomic builtin availability check