ref: 430967a6277a08d00e0128556f9ca6af28442186
parent: a47212259e2c6ca44b2ec502099ff33247684b52
author: Victorien Le Couviour--Tuffet <victorien@videolan.org>
date: Tue Oct 29 12:19:11 EDT 2019
checkasm: x86: ensure all SIMD lanes are turned on at all times YMM and ZMM registers on x86 are turned off to save power when they haven't been used for some period of time. When they are used there will be a "warmup" period during which performance will be reduced and inconsistent which is problematic when trying to benchmark individual functions. Periodically issue "dummy" instructions that uses those registers to prevent them from being powered down. The end result is more consistent benchmark results. Credits to Henrik Gramner's commit 1878c7f2af0a9c73e291488209109782c428cfcf from x264.
--- a/tests/checkasm/checkasm.c
+++ b/tests/checkasm/checkasm.c
@@ -146,6 +146,9 @@
int bench_c;
int verbose;
int function_listing;
+#if ARCH_X86_64
+ void (*simd_warmup)(void);
+#endif
} state;
/* float compare support code */
@@ -565,14 +568,26 @@
fprintf(stderr, "checkasm: using random seed %u\n", state.seed);
+ dav1d_init_cpu();
+#if ARCH_X86_64
+ void checkasm_warmup_avx2(void);
+ void checkasm_warmup_avx512(void);
+ unsigned cpu_flags = dav1d_get_cpu_flags();
+ if (cpu_flags & DAV1D_X86_CPU_FLAG_AVX512ICL)
+ state.simd_warmup = checkasm_warmup_avx512;
+ else if (cpu_flags & DAV1D_X86_CPU_FLAG_AVX2)
+ state.simd_warmup = checkasm_warmup_avx2;
+ else
+ state.simd_warmup = NULL;
+ checkasm_simd_warmup();
+#endif
check_cpu_flag(NULL, 0);
+
if (state.function_listing) {
print_functions(state.funcs);
} else {
- dav1d_init_cpu();
for (int i = 0; cpus[i].flag; i++)
check_cpu_flag(cpus[i].name, cpus[i].flag);
-
if (!state.num_checked) {
fprintf(stderr, "checkasm: no tests to perform\n");
} else if (state.num_failed) {
@@ -771,3 +786,11 @@
DEF_CHECKASM_CHECK_FUNC(uint16_t, "%04x")
DEF_CHECKASM_CHECK_FUNC(int16_t, "%6d")
DEF_CHECKASM_CHECK_FUNC(int32_t, "%9d")
+
+#if ARCH_X86_64
+void checkasm_simd_warmup(void)
+{
+ if (state.simd_warmup)
+ state.simd_warmup();
+}
+#endif
--- a/tests/checkasm/checkasm.h
+++ b/tests/checkasm/checkasm.h
@@ -193,6 +193,13 @@
* not guaranteed and false negatives is theoretically possible, but there
* can never be any false positives. */
void checkasm_stack_clobber(uint64_t clobber, ...);
+/* YMM and ZMM registers on x86 are turned off to save power when they haven't
+ * been used for some period of time. When they are used there will be a
+ * "warmup" period during which performance will be reduced and inconsistent
+ * which is problematic when trying to benchmark individual functions. We can
+ * work around this by periodically issuing "dummy" instructions that uses
+ * those registers to keep them powered on. */
+void checkasm_simd_warmup(void);
#define declare_new(ret, ...)\
ret (*checked_call)(void *, int, int, int, int, int, __VA_ARGS__) =\
(void *)checkasm_checked_call;
@@ -199,6 +206,7 @@
#define CLOB (UINT64_C(0xdeadbeefdeadbeef))
#define call_new(...)\
(checkasm_set_signal_handler_state(1),\
+ checkasm_simd_warmup(),\
checkasm_stack_clobber(CLOB, CLOB, CLOB, CLOB, CLOB, CLOB, CLOB,\
CLOB, CLOB, CLOB, CLOB, CLOB, CLOB, CLOB,\
CLOB, CLOB, CLOB, CLOB, CLOB, CLOB, CLOB),\
--- a/tests/checkasm/x86/checkasm.asm
+++ b/tests/checkasm/x86/checkasm.asm
@@ -170,6 +170,19 @@
.ok:
RET
+; trigger a warmup of vector units
+%macro WARMUP 0
+cglobal warmup, 0, 0
+ xorps m0, m0
+ mulps m0, m0
+ RET
+%endmacro
+
+INIT_YMM avx2
+WARMUP
+INIT_ZMM avx512
+WARMUP
+
%else
; just random numbers to reduce the chance of incidental match