ref: 35e777f3780327ea055339408bf7f9c1313254ce
parent: e72c2e45748a9f80223d57333772419f00fc4e7e
author: Janne Grunau <janne-vlc@jannau.net>
date: Sat Sep 29 09:57:29 EDT 2018
build: add support for arm/aarch64 asm and integrate checkasm
--- a/meson.build
+++ b/meson.build
@@ -59,7 +59,9 @@
# ASM option
is_asm_enabled = (get_option('build_asm') == true and
- host_machine.cpu_family().startswith('x86'))
+ (host_machine.cpu_family().startswith('x86')) or
+ host_machine.cpu_family() == 'aarch64' or
+ host_machine.cpu_family().startswith('arm'))
cdata.set10('HAVE_ASM', is_asm_enabled)
@@ -183,6 +185,21 @@
endif
endif
+cdata.set10('ARCH_AARCH64', host_machine.cpu_family() == 'aarch64')
+cdata.set10('ARCH_ARM', host_machine.cpu_family().startswith('arm'))
+if (is_asm_enabled and
+ (host_machine.cpu_family() == 'aarch64' or
+ host_machine.cpu_family().startswith('arm')))
+
+ as_func_code = '''__asm__ (
+".func meson_test"
+".endfunc"
+);
+'''
+ have_as_func = cc.compiles(as_func_code)
+ cdata.set10('HAVE_AS_FUNC', have_as_func)
+endif
+
if host_machine.cpu_family().startswith('x86')
cdata.set10('ARCH_X86', true)
if host_machine.cpu_family() == 'x86_64'
@@ -205,6 +222,7 @@
endif
if cc.symbols_have_underscore_prefix()
+ cdata.set10('PREFIX', true)
cdata_asm.set10('PREFIX', true)
endif
@@ -216,7 +234,7 @@
#
# ASM specific stuff
#
-if is_asm_enabled
+if is_asm_enabled and host_machine.cpu_family().startswith('x86')
# Generate config.asm
config_asm_target = configure_file(output: 'config.asm', output_format: 'nasm', configuration: cdata_asm)
--- /dev/null
+++ b/src/arm/32/util.S
@@ -1,0 +1,50 @@
+/******************************************************************************
+ * Copyright © 2018, VideoLAN and dav1d authors
+ * Copyright © 2015 Martin Storsjo
+ * Copyright © 2015 Janne Grunau
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice, this
+ * list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
+ * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *****************************************************************************/
+
+#ifndef __DAVID_SRC_ARM_32_UTIL_S__
+#define __DAVID_SRC_ARM_32_UTIL_S__
+
+#include "config.h"
+#include "src/arm/asm.S"
+
+.macro movrel rd, val
+#if defined(PIC)
+ ldr \rd, 1f
+ b 2f
+1:
+@ FIXME: thumb
+ .word \val - (2f + 8)
+2:
+ add \rd, \rd, pc
+#else
+ movw \rd, #:lower16:\val
+ movt \rd, #:upper16:\val
+#endif
+.endm
+
+#endif /* __DAVID_SRC_ARM_32_UTIL_S__ */
--- /dev/null
+++ b/src/arm/64/util.S
@@ -1,0 +1,62 @@
+/******************************************************************************
+ * Copyright © 2018, VideoLAN and dav1d authors
+ * Copyright © 2015 Martin Storsjo
+ * Copyright © 2015 Janne Grunau
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice, this
+ * list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
+ * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *****************************************************************************/
+
+#ifndef __DAVID_SRC_ARM_64_UTIL_S__
+#define __DAVID_SRC_ARM_64_UTIL_S__
+
+#include "config.h"
+#include "src/arm/asm.S"
+
+.macro movrel rd, val, offset=0
+#if defined(PIC) && defined(__APPLE__)
+ .if \offset < 0
+ adrp \rd, \val@PAGE
+ add \rd, \rd, \val@PAGEOFF
+ sub \rd, \rd, -(\offset)
+ .else
+ adrp \rd, \val+(\offset)@PAGE
+ add \rd, \rd, \val+(\offset)@PAGEOFF
+ .endif
+#elif defined(PIC) && defined(_WIN32)
+ .if \offset < 0
+ adrp \rd, \val
+ add \rd, \rd, :lo12:\val
+ sub \rd, \rd, -(\offset)
+ .else
+ adrp \rd, \val+(\offset)
+ add \rd, \rd, :lo12:\val+(\offset)
+ .endif
+#elif defined(PIC)
+ adrp \rd, \val+(\offset)
+ add \rd, \rd, :lo12:\val+(\offset)
+#else
+ ldr \rd, =\val+\offset
+#endif
+.endm
+
+#endif /* __DAVID_SRC_ARM_64_UTIL_S__ */
--- /dev/null
+++ b/src/arm/asm.S
@@ -1,0 +1,94 @@
+/*
+ * Copyright © 2018, VideoLAN and dav1d authors
+ * Copyright © 2018, Janne Grunau
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice, this
+ * list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
+ * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef __DAV1D_SRC_ARM_ASM_S__
+#define __DAV1D_SRC_ARM_ASM_S__
+
+#include "config.h"
+
+#ifndef PRIVATE_PREFIX
+#define PRIVATE_PREFIX dav1d_
+#endif
+
+#define PASTE(a,b) a ## b
+#define CONCAT(a,b) PASTE(a,b)
+
+#ifdef PREFIX
+#define EXTERN CONCAT(_,PRIVATE_PREFIX)
+#else
+#define EXTERN PRIVATE_PREFIX
+#endif
+
+.macro function name, export=0, align=2
+ .macro endfunc
+#ifdef __ELF__
+ .size \name, . - \name
+#endif
+#if HAVE_AS_FUNC
+ .endfunc
+#endif
+ .purgem endfunc
+ .endm
+ .text
+ .align \align
+ .if \export
+ .global EXTERN\name
+#ifdef __ELF__
+ .type EXTERN\name, %function
+#endif
+#if HAVE_AS_FUNC
+ .func EXTERN\name
+#endif
+EXTERN\name:
+ .else
+#ifdef __ELF__
+ .type \name, %function
+#endif
+#if HAVE_AS_FUNC
+ .func \name
+#endif
+ .endif
+\name:
+.endm
+
+.macro const name, align=2
+ .macro endconst
+#ifdef __ELF__
+ .size \name, . - \name
+#endif
+ .purgem endconst
+ .endm
+#if !defined(__MACH__)
+ .section .rodata
+#else
+ .const_data
+#endif
+ .align \align
+\name:
+.endm
+
+#endif /* __DAV1D_SRC_ARM_ASM_S__ */
--- /dev/null
+++ b/src/arm/cpu.c
@@ -1,0 +1,32 @@
+/*
+ * Copyright © 2018, VideoLAN and dav1d authors
+ * Copyright © 2018, Janne Grunau
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice, this
+ * list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
+ * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "src/arm/cpu.h"
+
+unsigned dav1d_get_cpu_flags_arm(void) {
+ return DAV1D_ARM_CPU_FLAG_NEON;
+}
--- /dev/null
+++ b/src/arm/cpu.h
@@ -1,0 +1,37 @@
+/*
+ * Copyright © 2018, VideoLAN and dav1d authors
+ * Copyright © 2018, Janne Grunau
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice, this
+ * list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
+ * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef __DAV1D_SRC_ARM_CPU_H__
+#define __DAV1D_SRC_ARM_CPU_H__
+
+enum CpuFlags {
+ DAV1D_ARM_CPU_FLAG_NEON = 1 << 0,
+};
+
+unsigned dav1d_get_cpu_flags_arm(void);
+
+#endif /* __DAV1D_SRC_ARM_CPU_H__ */
--- /dev/null
+++ b/src/cpu.c
@@ -1,0 +1,54 @@
+/*
+ * Copyright © 2018, VideoLAN and dav1d authors
+ * Copyright © 2018, Two Orioles, LLC
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice, this
+ * list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
+ * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <stdint.h>
+
+#include "config.h"
+#include "src/cpu.h"
+
+static unsigned flags_mask = -1;
+
+unsigned dav1d_get_cpu_flags(void) {
+ static unsigned flags;
+ static uint8_t checked = 0;
+
+ if (!checked) {
+#if ARCH_AARCH64 || ARCH_ARM
+ flags = dav1d_get_cpu_flags_arm();
+#elif ARCH_X86 && HAVE_ASM
+ flags = dav1d_get_cpu_flags_x86();
+#else
+ flags = 0;
+#endif
+ checked = 1;
+ }
+ return flags & flags_mask;
+}
+
+void dav1d_set_cpu_flags_mask(const unsigned mask) {
+ flags_mask = mask;
+}
--- a/src/cpu.h
+++ b/src/cpu.h
@@ -30,11 +30,13 @@
#include "config.h"
-#if ARCH_X86
+#if ARCH_AARCH64 || ARCH_ARM
+#include "src/arm/cpu.h"
+#elif ARCH_X86
#include "src/x86/cpu.h"
-#else
-#define dav1d_get_cpu_flags 0
-#define dav1d_set_cpu_flags_mask(mask) while (0)
#endif
+
+unsigned dav1d_get_cpu_flags(void);
+void dav1d_set_cpu_flags_mask(const unsigned mask);
#endif /* __DAV1D_SRC_CPU_H__ */
--- a/src/meson.build
+++ b/src/meson.build
@@ -29,6 +29,7 @@
# libdav1d source files
libdav1d_sources = files(
'picture.c',
+ 'cpu.c',
'data.c',
'ref.c',
'getbits.c',
@@ -73,26 +74,42 @@
)
# ASM specific sources
+libdav1d_nasm_objs = []
if is_asm_enabled
+ if (host_machine.cpu_family() == 'aarch64' or
+ host_machine.cpu_family().startswith('arm'))
- libdav1d_sources += files(
- 'x86/cpu.c',
- )
+ libdav1d_sources += files(
+ 'arm/cpu.c',
+ )
+ libdav1d_tmpl_sources += files(
+ )
+ if host_machine.cpu_family() == 'aarch64'
+ libdav1d_tmpl_sources += files(
+ )
+ elif host_machine.cpu_family().startswith('arm')
+ libdav1d_tmpl_sources += files(
+ )
+ endif
+ elif host_machine.cpu_family().startswith('x86')
- libdav1d_tmpl_sources += files(
- 'x86/mc_init.c',
- )
+ libdav1d_sources += files(
+ 'x86/cpu.c',
+ )
- # NASM source files
- libdav1d_sources_asm = files(
- 'x86/cpuid.asm',
- 'x86/mc.asm',
- )
+ libdav1d_tmpl_sources += files(
+ 'x86/mc_init.c',
+ )
- # Compile the ASM sources with NASM
- libdav1d_nasm_objs = nasm_gen.process(libdav1d_sources_asm)
-else
- libdav1d_nasm_objs = []
+ # NASM source files
+ libdav1d_sources_asm = files(
+ 'x86/cpuid.asm',
+ 'x86/mc.asm',
+ )
+
+ # Compile the ASM sources with NASM
+ libdav1d_nasm_objs = nasm_gen.process(libdav1d_sources_asm)
+ endif
endif
--- a/src/x86/cpu.c
+++ b/src/x86/cpu.c
@@ -32,7 +32,7 @@
void dav1d_cpu_cpuid(uint32_t *info, int leaf);
uint64_t dav1d_cpu_xgetbv(int xcr);
-static unsigned get_cpu_flags(void) {
+unsigned dav1d_get_cpu_flags_x86(void) {
uint32_t info[4] = {0}, n_ids;
unsigned flags = 0;
@@ -64,21 +64,4 @@
}
return flags;
-}
-
-static unsigned flags_mask = -1;
-
-unsigned dav1d_get_cpu_flags(void) {
- static unsigned flags;
- static uint8_t checked = 0;
-
- if (!checked) {
- flags = get_cpu_flags();
- checked = 1;
- }
- return flags & flags_mask;
-}
-
-void dav1d_set_cpu_flags_mask(const unsigned mask) {
- flags_mask = mask;
}
--- a/src/x86/cpu.h
+++ b/src/x86/cpu.h
@@ -40,7 +40,6 @@
DAV1D_X86_CPU_FLAG_AVX512 = 1 << 8, /* F + CD + BW + DQ + VL */
};
-unsigned dav1d_get_cpu_flags(void);
-void dav1d_set_cpu_flags_mask(unsigned mask);
+unsigned dav1d_get_cpu_flags_x86(void);
#endif /* __DAV1D_SRC_X86_CPU_H__ */
--- /dev/null
+++ b/tests/checkasm/arm/checkasm_32.S
@@ -1,0 +1,172 @@
+/******************************************************************************
+ * Copyright © 2018, VideoLAN and dav1d authors
+ * Copyright © 2015 Martin Storsjo
+ * Copyright © 2015 Janne Grunau
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice, this
+ * list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
+ * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *****************************************************************************/
+
+#define PRIVATE_PREFIX checkasm_
+
+#include "src/arm/asm.S"
+#include "src/arm/32/util.S"
+
+const register_init, align=3
+ .quad 0x21f86d66c8ca00ce
+ .quad 0x75b6ba21077c48ad
+ .quad 0xed56bb2dcb3c7736
+ .quad 0x8bda43d3fd1a7e06
+ .quad 0xb64a9c9e5d318408
+ .quad 0xdf9a54b303f1d3a3
+ .quad 0x4a75479abd64e097
+ .quad 0x249214109d5d1c88
+endconst
+
+const error_message_fpscr
+ .asciz "failed to preserve register FPSCR, changed bits: %x"
+error_message_gpr:
+ .asciz "failed to preserve register r%d"
+error_message_vfp:
+ .asciz "failed to preserve register d%d"
+endconst
+
+@ max number of args used by any asm function.
+#define MAX_ARGS 15
+
+#define ARG_STACK 4*(MAX_ARGS - 4)
+
+@ align the used stack space to 8 to preserve the stack alignment
+#define ARG_STACK_A (((ARG_STACK + pushed + 7) & ~7) - pushed)
+
+.macro clobbercheck variant
+.equ pushed, 4*9
+function checked_call_\variant, export=1
+ push {r4-r11, lr}
+.ifc \variant, vfp
+ vpush {d8-d15}
+ fmrx r4, FPSCR
+ push {r4}
+.equ pushed, pushed + 16*4 + 4
+.endif
+
+ movrel r12, register_init
+.ifc \variant, vfp
+ vldm r12, {d8-d15}
+.endif
+ ldm r12, {r4-r11}
+
+ sub sp, sp, #ARG_STACK_A
+.equ pos, 0
+.rept MAX_ARGS-4
+ ldr r12, [sp, #ARG_STACK_A + pushed + 8 + pos]
+ str r12, [sp, #pos]
+.equ pos, pos + 4
+.endr
+
+ mov r12, r0
+ mov r0, r2
+ mov r1, r3
+ ldrd r2, r3, [sp, #ARG_STACK_A + pushed]
+ blx r12
+ add sp, sp, #ARG_STACK_A
+
+ push {r0, r1}
+ movrel r12, register_init
+.ifc \variant, vfp
+.macro check_reg_vfp, dreg, offset
+ ldrd r2, r3, [r12, #8 * (\offset)]
+ vmov r0, lr, \dreg
+ eor r2, r2, r0
+ eor r3, r3, lr
+ orrs r2, r2, r3
+ bne 4f
+.endm
+
+.irp n, 8, 9, 10, 11, 12, 13, 14, 15
+ @ keep track of the checked double/SIMD register
+ mov r1, #\n
+ check_reg_vfp d\n, \n-8
+.endr
+.purgem check_reg_vfp
+
+ fmrx r1, FPSCR
+ ldr r3, [sp, #8]
+ eor r1, r1, r3
+ @ Ignore changes in bits 0-4 and 7
+ bic r1, r1, #0x9f
+ @ Ignore changes in the topmost 5 bits
+ bics r1, r1, #0xf8000000
+ bne 3f
+.endif
+
+ @ keep track of the checked GPR
+ mov r1, #4
+.macro check_reg reg1, reg2=
+ ldrd r2, r3, [r12], #8
+ eors r2, r2, \reg1
+ bne 2f
+ add r1, r1, #1
+.ifnb \reg2
+ eors r3, r3, \reg2
+ bne 2f
+.endif
+ add r1, r1, #1
+.endm
+ check_reg r4, r5
+ check_reg r6, r7
+@ r9 is a volatile register in the ios ABI
+#ifdef __APPLE__
+ check_reg r8
+#else
+ check_reg r8, r9
+#endif
+ check_reg r10, r11
+.purgem check_reg
+
+ b 0f
+4:
+ movrel r0, error_message_vfp
+ b 1f
+3:
+ movrel r0, error_message_fpscr
+ b 1f
+2:
+ movrel r0, error_message_gpr
+1:
+#ifdef PREFIX
+ blx _checkasm_fail_func
+#else
+ blx checkasm_fail_func
+#endif
+0:
+ pop {r0, r1}
+.ifc \variant, vfp
+ pop {r2}
+ fmxr FPSCR, r2
+ vpop {d8-d15}
+.endif
+ pop {r4-r11, pc}
+endfunc
+.endm
+
+clobbercheck vfp
--- /dev/null
+++ b/tests/checkasm/arm/checkasm_64.S
@@ -1,0 +1,170 @@
+/******************************************************************************
+ * Copyright © 2018, VideoLAN and dav1d authors
+ * Copyright © 2015 Martin Storsjo
+ * Copyright © 2015 Janne Grunau
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice, this
+ * list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
+ * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *****************************************************************************/
+
+#define PRIVATE_PREFIX checkasm_
+
+#include "src/arm/asm.S"
+#include "src/arm/64/util.S"
+
+const register_init, align=4
+ .quad 0x21f86d66c8ca00ce
+ .quad 0x75b6ba21077c48ad
+ .quad 0xed56bb2dcb3c7736
+ .quad 0x8bda43d3fd1a7e06
+ .quad 0xb64a9c9e5d318408
+ .quad 0xdf9a54b303f1d3a3
+ .quad 0x4a75479abd64e097
+ .quad 0x249214109d5d1c88
+ .quad 0x1a1b2550a612b48c
+ .quad 0x79445c159ce79064
+ .quad 0x2eed899d5a28ddcd
+ .quad 0x86b2536fcd8cf636
+ .quad 0xb0856806085e7943
+ .quad 0x3f2bf84fc0fcca4e
+ .quad 0xacbd382dcf5b8de2
+ .quad 0xd229e1f5b281303f
+ .quad 0x71aeaff20b095fd9
+ .quad 0xab63e2e11fa38ed9
+endconst
+
+
+const error_message
+ .asciz "failed to preserve register"
+endconst
+
+
+// max number of args used by any asm function.
+#define MAX_ARGS 15
+
+#define CLOBBER_STACK ((8*MAX_ARGS + 15) & ~15)
+
+function stack_clobber, export=1
+ mov x3, sp
+ mov x2, #CLOBBER_STACK
+1:
+ stp x0, x1, [sp, #-16]!
+ subs x2, x2, #16
+ b.gt 1b
+ mov sp, x3
+ ret
+endfunc
+
+#define ARG_STACK ((8*(MAX_ARGS - 8) + 15) & ~15)
+
+function checked_call, export=1
+ stp x29, x30, [sp, #-16]!
+ mov x29, sp
+ stp x19, x20, [sp, #-16]!
+ stp x21, x22, [sp, #-16]!
+ stp x23, x24, [sp, #-16]!
+ stp x25, x26, [sp, #-16]!
+ stp x27, x28, [sp, #-16]!
+ stp d8, d9, [sp, #-16]!
+ stp d10, d11, [sp, #-16]!
+ stp d12, d13, [sp, #-16]!
+ stp d14, d15, [sp, #-16]!
+
+ movrel x9, register_init
+ ldp d8, d9, [x9], #16
+ ldp d10, d11, [x9], #16
+ ldp d12, d13, [x9], #16
+ ldp d14, d15, [x9], #16
+ ldp x19, x20, [x9], #16
+ ldp x21, x22, [x9], #16
+ ldp x23, x24, [x9], #16
+ ldp x25, x26, [x9], #16
+ ldp x27, x28, [x9], #16
+
+ sub sp, sp, #ARG_STACK
+.equ pos, 0
+.rept MAX_ARGS-8
+ // Skip the first 8 args, that are loaded into registers
+ ldr x9, [x29, #16 + 8*8 + pos]
+ str x9, [sp, #pos]
+.equ pos, pos + 8
+.endr
+
+ mov x12, x0
+ ldp x0, x1, [x29, #16]
+ ldp x2, x3, [x29, #32]
+ ldp x4, x5, [x29, #48]
+ ldp x6, x7, [x29, #64]
+ blr x12
+ add sp, sp, #ARG_STACK
+ stp x0, x1, [sp, #-16]!
+ movrel x9, register_init
+ movi v3.8h, #0
+
+.macro check_reg_neon reg1, reg2
+ ldr q0, [x9], #16
+ uzp1 v1.2d, v\reg1\().2d, v\reg2\().2d
+ eor v0.16b, v0.16b, v1.16b
+ orr v3.16b, v3.16b, v0.16b
+.endm
+ check_reg_neon 8, 9
+ check_reg_neon 10, 11
+ check_reg_neon 12, 13
+ check_reg_neon 14, 15
+ uqxtn v3.8b, v3.8h
+ umov x3, v3.d[0]
+
+.macro check_reg reg1, reg2
+ ldp x0, x1, [x9], #16
+ eor x0, x0, \reg1
+ eor x1, x1, \reg2
+ orr x3, x3, x0
+ orr x3, x3, x1
+.endm
+ check_reg x19, x20
+ check_reg x21, x22
+ check_reg x23, x24
+ check_reg x25, x26
+ check_reg x27, x28
+
+ cbz x3, 0f
+
+ movrel x0, error_message
+#ifdef PREFIX
+ bl _checkasm_fail_func
+#else
+ bl checkasm_fail_func
+#endif
+0:
+ ldp x0, x1, [sp], #16
+ ldp d14, d15, [sp], #16
+ ldp d12, d13, [sp], #16
+ ldp d10, d11, [sp], #16
+ ldp d8, d9, [sp], #16
+ ldp x27, x28, [sp], #16
+ ldp x25, x26, [sp], #16
+ ldp x23, x24, [sp], #16
+ ldp x21, x22, [sp], #16
+ ldp x19, x20, [sp], #16
+ ldp x29, x30, [sp], #16
+ ret
+endfunc
--- a/tests/checkasm/checkasm.c
+++ b/tests/checkasm/checkasm.c
@@ -73,6 +73,8 @@
{ "AVX", "avx", DAV1D_X86_CPU_FLAG_AVX },
{ "AVX2", "avx2", DAV1D_X86_CPU_FLAG_AVX2 },
{ "AVX-512", "avx512", DAV1D_X86_CPU_FLAG_AVX512 },
+#elif ARCH_AARCH64 || ARCH_ARM
+ { "NEON", "neon", DAV1D_ARM_CPU_FLAG_NEON },
#endif
{ 0 }
};
--- a/tests/checkasm/checkasm.h
+++ b/tests/checkasm/checkasm.h
@@ -92,6 +92,7 @@
}
#define readtime readtime
#endif
+#endif
/* Verifies that clobbered callee-saved registers
* are properly saved and restored */
@@ -122,13 +123,34 @@
#define declare_new(ret, ...)\
ret (*checked_call)(void *, __VA_ARGS__) = (void *)checkasm_checked_call;
#define call_new(...) checked_call(func_new, __VA_ARGS__)
+#elif ARCH_ARM
+/* Use a dummy argument, to offset the real parameters by 2, not only 1.
+ * This makes sure that potential 8-byte-alignment of parameters is kept
+ * the same even when the extra parameters have been removed. */
+void checkasm_checked_call_vfp(void *func, int dummy, ...);
+#define declare_new(ret, ...)\
+ ret (*checked_call)(void *, int dummy, __VA_ARGS__) =\
+ (void *)checkasm_checked_call_vfp;
+#define call_new(...) checked_call(func_new, 0, __VA_ARGS__)
+#elif ARCH_AARCH64 && !defined(__APPLE__)
+void checkasm_stack_clobber(uint64_t clobber, ...);
+#define declare_new(ret, ...)\
+ ret (*checked_call)(void *, int, int, int, int, int, int, int,\
+ __VA_ARGS__) =\
+ (void *)checkasm_checked_call;
+#define CLOB (UINT64_C(0xdeadbeefdeadbeef))
+#define call_new(...)\
+ (checkasm_stack_clobber(CLOB, CLOB, CLOB, CLOB, CLOB, CLOB,\
+ CLOB, CLOB, CLOB, CLOB, CLOB, CLOB,\
+ CLOB, CLOB, CLOB, CLOB, CLOB, CLOB,\
+ CLOB, CLOB, CLOB, CLOB, CLOB),\
+ checked_call(func_new, 0, 0, 0, 0, 0, 0, 0, __VA_ARGS__))
#endif
-#else
+#else /* HAVE_ASM */
#define declare_new(ret, ...)
/* Call the function */
#define call_new(...) ((func_type *)func_new)(__VA_ARGS__)
-#endif
-#endif
+#endif /* HAVE_ASM */
/* Benchmark the function */
#ifdef readtime
--- a/tests/meson.build
+++ b/tests/meson.build
@@ -49,7 +49,14 @@
checkasm_bitdepth_objs += checkasm_bitdepth_lib.extract_all_objects()
endforeach
- checkasm_nasm_objs = nasm_gen.process(files('checkasm/x86/checkasm.asm'))
+ checkasm_nasm_objs = []
+ if host_machine.cpu_family() == 'aarch64'
+ checkasm_sources += files('checkasm/arm/checkasm_64.S')
+ elif host_machine.cpu_family().startswith('arm')
+ checkasm_sources += files('checkasm/arm/checkasm_32.S')
+ elif host_machine.cpu_family().startswith('x86')
+ checkasm_nasm_objs = nasm_gen.process(files('checkasm/x86/checkasm.asm'))
+ endif
checkasm = executable('checkasm', checkasm_sources, checkasm_nasm_objs,
objects: [checkasm_bitdepth_objs],