ref: f2ee7a2a430992816e20b9d3a364492abf8fbdb8
parent: d6e9cb0603817437aea15849228255661e19b2b6
author: Henrik Gramner <gramner@twoorioles.com>
date: Fri Sep 28 07:20:35 EDT 2018
Add 'checkasm' asm testing/benchmarking framework Some of the code originally written by, or based by code written by, the following authors who have agreed to relicense it to 2-clause BSD: Anton Mitrofanov Diego Biurrun Janne Grunau Loren Merritt Luca Barbato Martin Storsjö Michael Niedermayer
--- /dev/null
+++ b/src/cpu.h
@@ -1,0 +1,40 @@
+/*
+ * Copyright © 2018, VideoLAN and dav1d authors
+ * Copyright © 2018, Two Orioles, LLC
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice, this
+ * list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
+ * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef __DAV1D_SRC_CPU_H__
+#define __DAV1D_SRC_CPU_H__
+
+#include "config.h"
+
+#if ARCH_X86
+#include "src/x86/cpu.h"
+#else
+#define dav1d_get_cpu_flags 0
+#define dav1d_set_cpu_flags_mask(mask) while (0)
+#endif
+
+#endif /* __DAV1D_SRC_CPU_H__ */
--- a/src/x86/cpu.c
+++ b/src/x86/cpu.c
@@ -32,9 +32,9 @@
void dav1d_cpu_cpuid(uint32_t *info, int leaf);
uint64_t dav1d_cpu_xgetbv(int xcr);
-static enum CpuFlags get_cpu_flags_x86(void) {
+static unsigned get_cpu_flags(void) {
uint32_t info[4], n_ids;
- enum CpuFlags flags = 0;
+ unsigned flags = 0;
dav1d_cpu_cpuid(info, 0);
n_ids = info[0];
@@ -66,13 +66,19 @@
return flags;
}
-enum CpuFlags dav1d_get_cpu_flags_x86(void) {
- static enum CpuFlags flags;
+static unsigned flags_mask = -1;
+
+unsigned dav1d_get_cpu_flags(void) {
+ static unsigned flags;
static uint8_t checked = 0;
if (!checked) {
- flags = get_cpu_flags_x86();
+ flags = get_cpu_flags();
checked = 1;
}
- return flags;
+ return flags & flags_mask;
+}
+
+void dav1d_set_cpu_flags_mask(const unsigned mask) {
+ flags_mask = mask;
}
--- a/src/x86/cpu.h
+++ b/src/x86/cpu.h
@@ -40,6 +40,7 @@
DAV1D_X86_CPU_FLAG_AVX512 = 1 << 8, /* F + CD + BW + DQ + VL */
};
-enum CpuFlags dav1d_get_cpu_flags_x86(void);
+unsigned dav1d_get_cpu_flags(void);
+void dav1d_set_cpu_flags_mask(unsigned mask);
#endif /* __DAV1D_SRC_X86_CPU_H__ */
--- a/src/x86/mc_init.c
+++ b/src/x86/mc_init.c
@@ -25,10 +25,8 @@
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
-#include "config.h"
-
+#include "src/cpu.h"
#include "src/mc.h"
-#include "src/x86/cpu.h"
decl_mc_fn(dav1d_put_8tap_regular_avx2);
decl_mc_fn(dav1d_put_8tap_regular_smooth_avx2);
@@ -62,7 +60,7 @@
c->mc[type] = dav1d_put_##name##_##suffix
#define init_mct_fn(type, name, suffix) \
c->mct[type] = dav1d_prep_##name##_##suffix
- const enum CpuFlags flags = dav1d_get_cpu_flags_x86();
+ const unsigned flags = dav1d_get_cpu_flags();
if (!(flags & DAV1D_X86_CPU_FLAG_AVX2)) return;
--- /dev/null
+++ b/tests/checkasm/checkasm.c
@@ -1,0 +1,578 @@
+/*
+ * Copyright © 2018, VideoLAN and dav1d authors
+ * Copyright © 2018, Two Orioles, LLC
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice, this
+ * list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
+ * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <math.h>
+#include <stdarg.h>
+#include <stdio.h>
+#include <string.h>
+#include <time.h>
+
+#include "src/cpu.h"
+
+#include "tests/checkasm/checkasm.h"
+
+#ifdef _WIN32
+#include <windows.h>
+#define COLOR_RED FOREGROUND_RED
+#define COLOR_GREEN FOREGROUND_GREEN
+#define COLOR_YELLOW (FOREGROUND_RED|FOREGROUND_GREEN)
+#else
+#include <unistd.h>
+#define COLOR_RED 1
+#define COLOR_GREEN 2
+#define COLOR_YELLOW 3
+#endif
+
+/* List of tests to invoke */
+static const struct {
+ const char *name;
+ void (*func)(void);
+} tests[] = {
+ { NULL }
+};
+
+/* List of cpu flags to check */
+static const struct {
+ const char *name;
+ const char *suffix;
+ unsigned flag;
+} cpus[] = {
+#if ARCH_X86
+ { "SSE", "sse", DAV1D_X86_CPU_FLAG_SSE },
+ { "SSE2", "sse2", DAV1D_X86_CPU_FLAG_SSE2 },
+ { "SSE3", "sse3", DAV1D_X86_CPU_FLAG_SSE3 },
+ { "SSSE3", "ssse3", DAV1D_X86_CPU_FLAG_SSSE3 },
+ { "SSE4.1", "sse4", DAV1D_X86_CPU_FLAG_SSE41 },
+ { "SSE4.2", "sse42", DAV1D_X86_CPU_FLAG_SSE42 },
+ { "AVX", "avx", DAV1D_X86_CPU_FLAG_AVX },
+ { "AVX2", "avx2", DAV1D_X86_CPU_FLAG_AVX2 },
+ { "AVX-512", "avx512", DAV1D_X86_CPU_FLAG_AVX512 },
+#endif
+ { NULL }
+};
+
+typedef struct CheckasmFuncVersion {
+ struct CheckasmFuncVersion *next;
+ void *func;
+ int ok;
+ unsigned cpu;
+ int iterations;
+ uint64_t cycles;
+} CheckasmFuncVersion;
+
+/* Binary search tree node */
+typedef struct CheckasmFunc {
+ struct CheckasmFunc *child[2];
+ CheckasmFuncVersion versions;
+ uint8_t color; /* 0 = red, 1 = black */
+ char name[1];
+} CheckasmFunc;
+
+/* Internal state */
+static struct {
+ CheckasmFunc *funcs;
+ CheckasmFunc *current_func;
+ CheckasmFuncVersion *current_func_ver;
+ const char *current_test_name;
+ const char *bench_pattern;
+ int bench_pattern_len;
+ int num_checked;
+ int num_failed;
+ int nop_time;
+ unsigned cpu_flag;
+ const char *cpu_flag_name;
+ const char *test_name;
+} state;
+
+/* float compare support code */
+typedef union {
+ float f;
+ uint32_t i;
+} intfloat;
+
+static int is_negative(const intfloat u) {
+ return u.i >> 31;
+}
+
+int float_near_ulp(const float a, const float b, const unsigned max_ulp) {
+ intfloat x, y;
+
+ x.f = a;
+ y.f = b;
+
+ if (is_negative(x) != is_negative(y)) {
+ // handle -0.0 == +0.0
+ return a == b;
+ }
+
+ if (llabs((int64_t)x.i - y.i) <= max_ulp)
+ return 1;
+
+ return 0;
+}
+
+int float_near_ulp_array(const float *const a, const float *const b,
+ const unsigned max_ulp, const int len)
+{
+ for (int i = 0; i < len; i++)
+ if (!float_near_ulp(a[i], b[i], max_ulp))
+ return 0;
+
+ return 1;
+}
+
+int float_near_abs_eps(const float a, const float b, const float eps) {
+ return fabsf(a - b) < eps;
+}
+
+int float_near_abs_eps_array(const float *const a, const float *const b,
+ const float eps, const int len)
+{
+ for (int i = 0; i < len; i++)
+ if (!float_near_abs_eps(a[i], b[i], eps))
+ return 0;
+
+ return 1;
+}
+
+int float_near_abs_eps_ulp(const float a, const float b, const float eps,
+ const unsigned max_ulp)
+{
+ return float_near_ulp(a, b, max_ulp) || float_near_abs_eps(a, b, eps);
+}
+
+int float_near_abs_eps_array_ulp(const float *const a, const float *const b,
+ const float eps, const unsigned max_ulp,
+ const int len)
+{
+ for (int i = 0; i < len; i++)
+ if (!float_near_abs_eps_ulp(a[i], b[i], eps, max_ulp))
+ return 0;
+
+ return 1;
+}
+
+/* Print colored text to stderr if the terminal supports it */
+static void color_printf(const int color, const char *const fmt, ...) {
+ static int8_t use_color = -1;
+ va_list arg;
+
+#ifdef _WIN32
+ static HANDLE con;
+ static WORD org_attributes;
+
+ if (use_color < 0) {
+ CONSOLE_SCREEN_BUFFER_INFO con_info;
+ con = GetStdHandle(STD_ERROR_HANDLE);
+ if (con && con != INVALID_HANDLE_VALUE &&
+ GetConsoleScreenBufferInfo(con, &con_info))
+ {
+ org_attributes = con_info.wAttributes;
+ use_color = 1;
+ } else
+ use_color = 0;
+ }
+ if (use_color)
+ SetConsoleTextAttribute(con, (org_attributes & 0xfff0) |
+ (color & 0x0f));
+#else
+ if (use_color < 0) {
+ const char *const term = getenv("TERM");
+ use_color = term && strcmp(term, "dumb") && isatty(2);
+ }
+ if (use_color)
+ fprintf(stderr, "\x1b[%d;3%dm", (color & 0x08) >> 3, color & 0x07);
+#endif
+
+ va_start(arg, fmt);
+ vfprintf(stderr, fmt, arg);
+ va_end(arg);
+
+ if (use_color) {
+#ifdef _WIN32
+ SetConsoleTextAttribute(con, org_attributes);
+#else
+ fprintf(stderr, "\x1b[0m");
+#endif
+ }
+}
+
+/* Deallocate a tree */
+static void destroy_func_tree(CheckasmFunc *const f) {
+ if (f) {
+ CheckasmFuncVersion *v = f->versions.next;
+ while (v) {
+ CheckasmFuncVersion *next = v->next;
+ free(v);
+ v = next;
+ }
+
+ destroy_func_tree(f->child[0]);
+ destroy_func_tree(f->child[1]);
+ free(f);
+ }
+}
+
+/* Allocate a zero-initialized block, clean up and exit on failure */
+static void *checkasm_malloc(const size_t size) {
+ void *const ptr = calloc(1, size);
+ if (!ptr) {
+ fprintf(stderr, "checkasm: malloc failed\n");
+ destroy_func_tree(state.funcs);
+ exit(1);
+ }
+ return ptr;
+}
+
+/* Get the suffix of the specified cpu flag */
+static const char *cpu_suffix(const unsigned cpu) {
+ for (int i = sizeof(cpus) / sizeof(*cpus) - 2; i >= 0; i--)
+ if (cpu & cpus[i].flag)
+ return cpus[i].suffix;
+
+ return "c";
+}
+
+#ifdef readtime
+static int cmp_nop(const void *a, const void *b) {
+ return *(const uint16_t*)a - *(const uint16_t*)b;
+}
+
+/* Measure the overhead of the timing code (in decicycles) */
+static int measure_nop_time(void) {
+ uint16_t nops[10000];
+ int i, nop_sum = 0;
+
+ for (i = 0; i < 10000; i++) {
+ uint64_t t = readtime();
+ nops[i] = readtime() - t;
+ }
+
+ qsort(nops, 10000, sizeof(uint16_t), cmp_nop);
+ for (i = 2500; i < 7500; i++)
+ nop_sum += nops[i];
+
+ return nop_sum / 500;
+}
+
+/* Print benchmark results */
+static void print_benchs(const CheckasmFunc *const f) {
+ if (f) {
+ print_benchs(f->child[0]);
+
+ /* Only print functions with at least one assembly version */
+ if (f->versions.cpu || f->versions.next) {
+ const CheckasmFuncVersion *v = &f->versions;
+ do {
+ if (v->iterations) {
+ int decicycles = (10*v->cycles/v->iterations -
+ state.nop_time) / 4;
+ printf("%s_%s: %d.%d\n", f->name, cpu_suffix(v->cpu),
+ decicycles/10, decicycles%10);
+ }
+ } while ((v = v->next));
+ }
+
+ print_benchs(f->child[1]);
+ }
+}
+#endif
+
+#define is_digit(x) ((x) >= '0' && (x) <= '9')
+
+/* ASCIIbetical sort except preserving natural order for numbers */
+static int cmp_func_names(const char *a, const char *b) {
+ const char *const start = a;
+ int ascii_diff, digit_diff;
+
+ for (; !(ascii_diff = *(const unsigned char*)a -
+ *(const unsigned char*)b) && *a; a++, b++);
+ for (; is_digit(*a) && is_digit(*b); a++, b++);
+
+ if (a > start && is_digit(a[-1]) &&
+ (digit_diff = is_digit(*a) - is_digit(*b)))
+ {
+ return digit_diff;
+ }
+
+ return ascii_diff;
+}
+
+/* Perform a tree rotation in the specified direction and return the new root */
+static CheckasmFunc *rotate_tree(CheckasmFunc *const f, const int dir) {
+ CheckasmFunc *const r = f->child[dir^1];
+ f->child[dir^1] = r->child[dir];
+ r->child[dir] = f;
+ r->color = f->color;
+ f->color = 0;
+ return r;
+}
+
+#define is_red(f) ((f) && !(f)->color)
+
+/* Balance a left-leaning red-black tree at the specified node */
+static void balance_tree(CheckasmFunc **root) {
+ CheckasmFunc *const f = *root;
+
+ if (is_red(f->child[0]) && is_red(f->child[1])) {
+ f->color ^= 1;
+ f->child[0]->color = f->child[1]->color = 1;
+ }
+ else if (!is_red(f->child[0]) && is_red(f->child[1]))
+ *root = rotate_tree(f, 0); /* Rotate left */
+ else if (is_red(f->child[0]) && is_red(f->child[0]->child[0]))
+ *root = rotate_tree(f, 1); /* Rotate right */
+}
+
+/* Get a node with the specified name, creating it if it doesn't exist */
+static CheckasmFunc *get_func(CheckasmFunc **root, const char *const name) {
+ CheckasmFunc *f = *root;
+
+ if (f) {
+ /* Search the tree for a matching node */
+ int cmp = cmp_func_names(name, f->name);
+ if (cmp) {
+ f = get_func(&f->child[cmp > 0], name);
+
+ /* Rebalance the tree on the way up if a new node was inserted */
+ if (!f->versions.func)
+ balance_tree(root);
+ }
+ } else {
+ /* Allocate and insert a new node into the tree */
+ const int name_length = strlen(name);
+ f = *root = checkasm_malloc(sizeof(CheckasmFunc) + name_length);
+ memcpy(f->name, name, name_length + 1);
+ }
+
+ return f;
+}
+
+/* Perform tests and benchmarks for the specified
+ * cpu flag if supported by the host */
+static void check_cpu_flag(const char *const name, unsigned flag) {
+ const unsigned old_cpu_flag = state.cpu_flag;
+
+ flag |= old_cpu_flag;
+ dav1d_set_cpu_flags_mask(flag);
+ state.cpu_flag = dav1d_get_cpu_flags();
+
+ if (!flag || state.cpu_flag != old_cpu_flag) {
+ state.cpu_flag_name = name;
+ for (int i = 0; tests[i].func; i++) {
+ if (state.test_name && strcmp(tests[i].name, state.test_name))
+ continue;
+ state.current_test_name = tests[i].name;
+ tests[i].func();
+ }
+ }
+}
+
+/* Print the name of the current CPU flag, but only do it once */
+static void print_cpu_name(void) {
+ if (state.cpu_flag_name) {
+ color_printf(COLOR_YELLOW, "%s:\n", state.cpu_flag_name);
+ state.cpu_flag_name = NULL;
+ }
+}
+
+int main(int argc, char *argv[]) {
+#ifdef readtime
+ unsigned int seed = readtime();
+#else
+ unsigned int seed = time(NULL);
+#endif
+ int ret = 0;
+
+ /*if (!tests[0].func || !cpus[0].flag) {
+ fprintf(stderr, "checkasm: no tests to perform\n");
+ return 0;
+ }*/
+
+ while (argc > 1) {
+ if (!strncmp(argv[1], "--bench", 7)) {
+#ifndef readtime
+ fprintf(stderr,
+ "checkasm: --bench is not supported on your system\n");
+ return 1;
+#endif
+ if (argv[1][7] == '=') {
+ state.bench_pattern = argv[1] + 8;
+ state.bench_pattern_len = strlen(state.bench_pattern);
+ } else
+ state.bench_pattern = "";
+ } else if (!strncmp(argv[1], "--test=", 7)) {
+ state.test_name = argv[1] + 7;
+ } else {
+ seed = strtoul(argv[1], NULL, 10);
+ }
+
+ argc--;
+ argv++;
+ }
+
+ fprintf(stderr, "checkasm: using random seed %u\n", seed);
+ srand(seed);
+
+ check_cpu_flag(NULL, 0);
+ for (int i = 0; cpus[i].flag; i++)
+ check_cpu_flag(cpus[i].name, cpus[i].flag);
+
+ if (state.num_failed) {
+ fprintf(stderr, "checkasm: %d of %d tests have failed\n",
+ state.num_failed, state.num_checked);
+ ret = 1;
+ } else {
+ fprintf(stderr, "checkasm: all %d tests passed\n", state.num_checked);
+#ifdef readtime
+ if (state.bench_pattern) {
+ state.nop_time = measure_nop_time();
+ printf("nop: %d.%d\n", state.nop_time/10, state.nop_time%10);
+ print_benchs(state.funcs);
+ }
+#endif
+ }
+
+ destroy_func_tree(state.funcs);
+ return ret;
+}
+
+/* Decide whether or not the specified function needs to be tested and
+ * allocate/initialize data structures if needed. Returns a pointer to a
+ * reference function if the function should be tested, otherwise NULL */
+void *checkasm_check_func(void *const func, const char *const name, ...) {
+ char name_buf[256];
+ va_list arg;
+
+ va_start(arg, name);
+ const int name_length = vsnprintf(name_buf, sizeof(name_buf), name, arg);
+ va_end(arg);
+
+ if (!func || name_length <= 0 || name_length >= sizeof(name_buf))
+ return NULL;
+
+ state.current_func = get_func(&state.funcs, name_buf);
+ state.funcs->color = 1;
+ CheckasmFuncVersion *v = &state.current_func->versions;
+ void *ref = func;
+
+ if (v->func) {
+ CheckasmFuncVersion *prev;
+ do {
+ /* Only test functions that haven't already been tested */
+ if (v->func == func)
+ return NULL;
+
+ if (v->ok)
+ ref = v->func;
+
+ prev = v;
+ } while ((v = v->next));
+
+ v = prev->next = checkasm_malloc(sizeof(CheckasmFuncVersion));
+ }
+
+ v->func = func;
+ v->ok = 1;
+ v->cpu = state.cpu_flag;
+ state.current_func_ver = v;
+
+ if (state.cpu_flag)
+ state.num_checked++;
+
+ return ref;
+}
+
+/* Decide whether or not the current function needs to be benchmarked */
+int checkasm_bench_func(void) {
+ return !state.num_failed && state.bench_pattern &&
+ !strncmp(state.current_func->name, state.bench_pattern,
+ state.bench_pattern_len);
+}
+
+/* Indicate that the current test has failed */
+void checkasm_fail_func(const char *const msg, ...) {
+ if (state.current_func_ver->cpu && state.current_func_ver->ok) {
+ va_list arg;
+
+ print_cpu_name();
+ fprintf(stderr, " %s_%s (", state.current_func->name,
+ cpu_suffix(state.current_func_ver->cpu));
+ va_start(arg, msg);
+ vfprintf(stderr, msg, arg);
+ va_end(arg);
+ fprintf(stderr, ")\n");
+
+ state.current_func_ver->ok = 0;
+ state.num_failed++;
+ }
+}
+
+/* Update benchmark results of the current function */
+void checkasm_update_bench(const int iterations, const uint64_t cycles) {
+ state.current_func_ver->iterations += iterations;
+ state.current_func_ver->cycles += cycles;
+}
+
+/* Print the outcome of all tests performed since
+ * the last time this function was called */
+void checkasm_report(const char *const name, ...) {
+ static int prev_checked, prev_failed, max_length;
+
+ if (state.num_checked > prev_checked) {
+ int pad_length = max_length + 4;
+ va_list arg;
+
+ print_cpu_name();
+ pad_length -= fprintf(stderr, " - %s.", state.current_test_name);
+ va_start(arg, name);
+ pad_length -= vfprintf(stderr, name, arg);
+ va_end(arg);
+ fprintf(stderr, "%*c", imax(pad_length, 0) + 2, '[');
+
+ if (state.num_failed == prev_failed)
+ color_printf(COLOR_GREEN, "OK");
+ else
+ color_printf(COLOR_RED, "FAILED");
+ fprintf(stderr, "]\n");
+
+ prev_checked = state.num_checked;
+ prev_failed = state.num_failed;
+ } else if (!state.cpu_flag) {
+ /* Calculate the amount of padding required
+ * to make the output vertically aligned */
+ int length = strlen(state.current_test_name);
+ va_list arg;
+
+ va_start(arg, name);
+ length += vsnprintf(NULL, 0, name, arg);
+ va_end(arg);
+
+ if (length > max_length)
+ max_length = length;
+ }
+}
--- /dev/null
+++ b/tests/checkasm/checkasm.h
@@ -1,0 +1,157 @@
+/*
+ * Copyright © 2018, VideoLAN and dav1d authors
+ * Copyright © 2018, Two Orioles, LLC
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice, this
+ * list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
+ * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef __DAV1D_TESTS_CHECKASM_CHECKASM_H
+#define __DAV1D_TESTS_CHECKASM_CHECKASM_H
+
+#include "config.h"
+
+#include <stdint.h>
+#include <stdlib.h>
+
+#include "include/common/attributes.h"
+#include "include/common/intops.h"
+
+void *checkasm_check_func(void *func, const char *name, ...);
+int checkasm_bench_func(void);
+void checkasm_fail_func(const char *msg, ...);
+void checkasm_update_bench(int iterations, uint64_t cycles);
+void checkasm_report(const char *name, ...);
+
+/* float compare utilities */
+int float_near_ulp(float a, float b, unsigned max_ulp);
+int float_near_abs_eps(float a, float b, float eps);
+int float_near_abs_eps_ulp(float a, float b, float eps, unsigned max_ulp);
+int float_near_ulp_array(const float *a, const float *b, unsigned max_ulp,
+ int len);
+int float_near_abs_eps_array(const float *a, const float *b, float eps,
+ int len);
+int float_near_abs_eps_array_ulp(const float *a, const float *b, float eps,
+ unsigned max_ulp, int len);
+
+static void *func_ref, *func_new;
+
+#define BENCH_RUNS (1 << 12) /* Trade-off between accuracy and speed */
+
+/* Decide whether or not the specified function needs to be tested */
+#define check_func(func, ...)\
+ (func_ref = checkasm_check_func((func_new = func), __VA_ARGS__))
+
+/* Declare the function prototype. The first argument is the return value,
+ * the remaining arguments are the function parameters. Naming parameters
+ * is optional. */
+#define declare_func(ret, ...)\
+ declare_new(ret, __VA_ARGS__) typedef ret func_type(__VA_ARGS__)
+
+/* Indicate that the current test has failed */
+#define fail() checkasm_fail_func("%s:%d", __FILE__, __LINE__)
+
+/* Print the test outcome */
+#define report checkasm_report
+
+/* Call the reference function */
+#define call_ref(...) ((func_type *)func_ref)(__VA_ARGS__)
+
+#if HAVE_ASM
+#if ARCH_X86
+#ifdef _MSC_VER
+#include <intrin.h>
+#define readtime() (_mm_lfence(), __rdtsc())
+#else
+static inline uint64_t readtime(void) {
+ uint32_t eax, edx;
+ __asm__ __volatile__("lfence\nrdtsc" : "=a"(eax), "=d"(edx));
+ return (((uint64_t)edx) << 32) | eax;
+}
+#define readtime readtime
+#endif
+
+/* Verifies that clobbered callee-saved registers
+ * are properly saved and restored */
+void checkasm_checked_call(void *func, ...);
+
+#if ARCH_X86_64
+/* Evil hack: detect incorrect assumptions that 32-bit ints are zero-extended
+ * to 64-bit. This is done by clobbering the stack with junk around the stack
+ * pointer and calling the assembly function through checked_call() with added
+ * dummy arguments which forces all real arguments to be passed on the stack
+ * and not in registers. For 32-bit arguments the upper half of the 64-bit
+ * register locations on the stack will now contain junk which will cause
+ * misbehaving functions to either produce incorrect output or segfault. Note
+ * that even though this works extremely well in practice, it's technically
+ * not guaranteed and false negatives is theoretically possible, but there
+ * can never be any false positives. */
+void checkasm_stack_clobber(uint64_t clobber, ...);
+#define declare_new(ret, ...)\
+ ret (*checked_call)(void *, int, int, int, int, int, __VA_ARGS__) =\
+ (void *)checkasm_checked_call;
+#define CLOB (UINT64_C(0xdeadbeefdeadbeef))
+#define call_new(...)\
+ (checkasm_stack_clobber(CLOB, CLOB, CLOB, CLOB, CLOB, CLOB, CLOB,\
+ CLOB, CLOB, CLOB, CLOB, CLOB, CLOB, CLOB,\
+ CLOB, CLOB, CLOB, CLOB, CLOB, CLOB, CLOB),\
+ checked_call(func_new, 0, 0, 0, 0, 0, __VA_ARGS__))
+#elif ARCH_X86_32
+#define declare_new(ret, ...)\
+ ret (*checked_call)(void *, __VA_ARGS__) = (void *)checkasm_checked_call;
+#define call_new(...) checked_call(func_new, __VA_ARGS__)
+#endif
+#else
+#define declare_new(ret, ...)
+/* Call the function */
+#define call_new(...) ((func_type *)func_new)(__VA_ARGS__)
+#endif
+#endif
+
+/* Benchmark the function */
+#ifdef readtime
+#define bench_new(...)\
+ do {\
+ if (checkasm_bench_func()) {\
+ func_type *tfunc = func_new;\
+ uint64_t tsum = 0;\
+ int ti, tcount = 0;\
+ for (ti = 0; ti < BENCH_RUNS; ti++) {\
+ uint64_t t = readtime();\
+ tfunc(__VA_ARGS__);\
+ tfunc(__VA_ARGS__);\
+ tfunc(__VA_ARGS__);\
+ tfunc(__VA_ARGS__);\
+ t = readtime() - t;\
+ if (t*tcount <= tsum*4 && ti > 0) {\
+ tsum += t;\
+ tcount++;\
+ }\
+ }\
+ checkasm_update_bench(tcount, tsum);\
+ }\
+ } while (0)
+#else
+#define bench_new(...) while (0)
+#endif
+
+#endif /* __DAV1D_TESTS_CHECKASM_CHECKASM_H */
--- /dev/null
+++ b/tests/checkasm/x86/checkasm.asm
@@ -1,0 +1,212 @@
+; Copyright © 2018, VideoLAN and dav1d authors
+; Copyright © 2018, Two Orioles, LLC
+; All rights reserved.
+;
+; Redistribution and use in source and binary forms, with or without
+; modification, are permitted provided that the following conditions are met:
+;
+; 1. Redistributions of source code must retain the above copyright notice, this
+; list of conditions and the following disclaimer.
+;
+; 2. Redistributions in binary form must reproduce the above copyright notice,
+; this list of conditions and the following disclaimer in the documentation
+; and/or other materials provided with the distribution.
+;
+; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+; ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+; WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+; DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
+; ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+; (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+; ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+; SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+%define private_prefix checkasm
+%include "config.asm"
+%include "ext/x86/x86inc.asm"
+
+SECTION_RODATA
+
+error_message: db "failed to preserve register", 0
+
+%if ARCH_X86_64
+; just random numbers to reduce the chance of incidental match
+ALIGN 16
+x6: dq 0x1a1b2550a612b48c,0x79445c159ce79064
+x7: dq 0x2eed899d5a28ddcd,0x86b2536fcd8cf636
+x8: dq 0xb0856806085e7943,0x3f2bf84fc0fcca4e
+x9: dq 0xacbd382dcf5b8de2,0xd229e1f5b281303f
+x10: dq 0x71aeaff20b095fd9,0xab63e2e11fa38ed9
+x11: dq 0x89b0c0765892729a,0x77d410d5c42c882d
+x12: dq 0xc45ea11a955d8dd5,0x24b3c1d2a024048b
+x13: dq 0x2e8ec680de14b47c,0xdd7b8919edd42786
+x14: dq 0x135ce6888fa02cbf,0x11e53e2b2ac655ef
+x15: dq 0x011ff554472a7a10,0x6de8f4c914c334d5
+n7: dq 0x21f86d66c8ca00ce
+n8: dq 0x75b6ba21077c48ad
+n9: dq 0xed56bb2dcb3c7736
+n10: dq 0x8bda43d3fd1a7e06
+n11: dq 0xb64a9c9e5d318408
+n12: dq 0xdf9a54b303f1d3a3
+n13: dq 0x4a75479abd64e097
+n14: dq 0x249214109d5d1c88
+%endif
+
+SECTION .text
+
+cextern fail_func
+
+; max number of args used by any asm function.
+; (max_args % 4) must equal 3 for stack alignment
+%define max_args 15
+
+%if ARCH_X86_64
+
+;-----------------------------------------------------------------------------
+; int checkasm_stack_clobber(uint64_t clobber, ...)
+;-----------------------------------------------------------------------------
+cglobal stack_clobber, 1,2
+ ; Clobber the stack with junk below the stack pointer
+ %define argsize (max_args+6)*8
+ SUB rsp, argsize
+ mov r1, argsize-8
+.loop:
+ mov [rsp+r1], r0
+ sub r1, 8
+ jge .loop
+ ADD rsp, argsize
+ RET
+
+%if WIN64
+ %assign free_regs 7
+ DECLARE_REG_TMP 4
+%else
+ %assign free_regs 9
+ DECLARE_REG_TMP 7
+%endif
+
+;-----------------------------------------------------------------------------
+; void checkasm_checked_call(void *func, ...)
+;-----------------------------------------------------------------------------
+INIT_XMM
+cglobal checked_call, 2,15,16,max_args*8+8
+ mov t0, r0
+
+ ; All arguments have been pushed on the stack instead of registers in
+ ; order to test for incorrect assumptions that 32-bit ints are
+ ; zero-extended to 64-bit.
+ mov r0, r6mp
+ mov r1, r7mp
+ mov r2, r8mp
+ mov r3, r9mp
+%if UNIX64
+ mov r4, r10mp
+ mov r5, r11mp
+ %assign i 6
+ %rep max_args-6
+ mov r9, [rsp+stack_offset+(i+1)*8]
+ mov [rsp+(i-6)*8], r9
+ %assign i i+1
+ %endrep
+%else ; WIN64
+ %assign i 4
+ %rep max_args-4
+ mov r9, [rsp+stack_offset+(i+7)*8]
+ mov [rsp+i*8], r9
+ %assign i i+1
+ %endrep
+
+ ; Move possible floating-point arguments to the correct registers
+ movq m0, r0
+ movq m1, r1
+ movq m2, r2
+ movq m3, r3
+
+ %assign i 6
+ %rep 16-6
+ mova m %+ i, [x %+ i]
+ %assign i i+1
+ %endrep
+%endif
+
+%assign i 14
+%rep 15-free_regs
+ mov r %+ i, [n %+ i]
+ %assign i i-1
+%endrep
+ call t0
+%assign i 14
+%rep 15-free_regs
+ xor r %+ i, [n %+ i]
+ or r14, r %+ i
+ %assign i i-1
+%endrep
+
+%if WIN64
+ %assign i 6
+ %rep 16-6
+ pxor m %+ i, [x %+ i]
+ por m6, m %+ i
+ %assign i i+1
+ %endrep
+ packsswb m6, m6
+ movq r5, m6
+ or r14, r5
+%endif
+
+ ; Call fail_func() with a descriptive message to mark it as a failure
+ ; if the called function didn't preserve all callee-saved registers.
+ ; Save the return value located in rdx:rax first to prevent clobbering.
+ jz .ok
+ mov r9, rax
+ mov r10, rdx
+ lea r0, [error_message]
+ xor eax, eax
+ call fail_func
+ mov rdx, r10
+ mov rax, r9
+.ok:
+ RET
+
+%else
+
+; just random numbers to reduce the chance of incidental match
+%define n3 dword 0x6549315c
+%define n4 dword 0xe02f3e23
+%define n5 dword 0xb78d0d1d
+%define n6 dword 0x33627ba7
+
+;-----------------------------------------------------------------------------
+; void checkasm_checked_call(void *func, ...)
+;-----------------------------------------------------------------------------
+cglobal checked_call, 1,7
+ mov r3, n3
+ mov r4, n4
+ mov r5, n5
+ mov r6, n6
+%rep max_args
+ PUSH dword [esp+20+max_args*4]
+%endrep
+ call r0
+ xor r3, n3
+ xor r4, n4
+ xor r5, n5
+ xor r6, n6
+ or r3, r4
+ or r5, r6
+ or r3, r5
+ jz .ok
+ mov r3, eax
+ mov r4, edx
+ lea r0, [error_message]
+ mov [esp], r0
+ call fail_func
+ mov edx, r4
+ mov eax, r3
+.ok:
+ add esp, max_args*4
+ RET
+
+%endif ; ARCH_X86_64