ref: 1d5ef8df0d76785bbf47218a179f541151aafe3e
parent: 19ce77e0b9ec60e0a3a748ae9c2c2b417d3b75b5
author: Martin Storsjö <martin@martin.st>
date: Thu Feb 6 04:09:47 EST 2020
arm: cdef: Prepare for 16bpc
--- a/src/arm/32/cdef.S
+++ b/src/arm/32/cdef.S
@@ -148,10 +148,10 @@
.endif
.endm
-// void dav1d_cdef_paddingX_neon(uint16_t *tmp, const pixel *src,
-// ptrdiff_t src_stride, const pixel (*left)[2],
-// const pixel *const top, int h,
-// enum CdefEdgeFlags edges);
+// void dav1d_cdef_paddingX_8bpc_neon(uint16_t *tmp, const pixel *src,
+// ptrdiff_t src_stride, const pixel (*left)[2],
+// const pixel *const top, int h,
+// enum CdefEdgeFlags edges);
// n1 = s0/d0
// w1 = d0/q0
@@ -158,7 +158,7 @@
// n2 = s4/d2
// w2 = d2/q1
.macro padding_func w, stride, n1, w1, n2, w2, align
-function cdef_padding\w\()_neon, export=1
+function cdef_padding\w\()_8bpc_neon, export=1
push {r4-r7,lr}
ldrd r4, r5, [sp, #20]
ldr r6, [sp, #28]
@@ -337,9 +337,10 @@
3:
.endm
-// void dav1d_cdef_filterX_neon(pixel *dst, ptrdiff_t dst_stride,
-// const uint16_t *tmp, int pri_strength,
-// int sec_strength, int dir, int damping, int h);
+// void dav1d_cdef_filterX_8bpc_neon(pixel *dst, ptrdiff_t dst_stride,
+// const uint16_t *tmp, int pri_strength,
+// int sec_strength, int dir, int damping,
+// int h);
.macro filter_func w, pri, sec, min, suffix
function cdef_filter\w\suffix\()_neon
.if \pri
@@ -468,7 +469,7 @@
filter_func \w, pri=0, sec=1, min=0, suffix=_sec
filter_func \w, pri=1, sec=1, min=1, suffix=_pri_sec
-function cdef_filter\w\()_neon, export=1
+function cdef_filter\w\()_8bpc_neon, export=1
push {r4-r9,lr}
vpush {q4-q7}
ldrd r4, r5, [sp, #92]
@@ -496,9 +497,9 @@
.short 420, 210, 140, 105, 105, 105, 105, 105, 140, 210, 420, 0
endconst
-// int dav1d_cdef_find_dir_neon(const pixel *img, const ptrdiff_t stride,
-// unsigned *const var)
-function cdef_find_dir_neon, export=1
+// int dav1d_cdef_find_dir_8bpc_neon(const pixel *img, const ptrdiff_t stride,
+// unsigned *const var)
+function cdef_find_dir_8bpc_neon, export=1
push {lr}
vpush {q4-q7}
sub sp, sp, #32 // cost
--- a/src/arm/64/cdef.S
+++ b/src/arm/64/cdef.S
@@ -137,13 +137,13 @@
.endif
.endm
-// void dav1d_cdef_paddingX_neon(uint16_t *tmp, const pixel *src,
-// ptrdiff_t src_stride, const pixel (*left)[2],
-// const pixel *const top, int h,
-// enum CdefEdgeFlags edges);
+// void dav1d_cdef_paddingX_8bpc_neon(uint16_t *tmp, const pixel *src,
+// ptrdiff_t src_stride, const pixel (*left)[2],
+// const pixel *const top, int h,
+// enum CdefEdgeFlags edges);
.macro padding_func w, stride, rn, rw
-function cdef_padding\w\()_neon, export=1
+function cdef_padding\w\()_8bpc_neon, export=1
movi v30.8h, #0x80, lsl #8
mov v31.16b, v30.16b
sub x0, x0, #2*(2*\stride+2)
@@ -312,9 +312,10 @@
3:
.endm
-// void dav1d_cdef_filterX_neon(pixel *dst, ptrdiff_t dst_stride,
-// const uint16_t *tmp, int pri_strength,
-// int sec_strength, int dir, int damping, int h);
+// void dav1d_cdef_filterX_8bpc_neon(pixel *dst, ptrdiff_t dst_stride,
+// const uint16_t *tmp, int pri_strength,
+// int sec_strength, int dir, int damping,
+// int h);
.macro filter_func w, pri, sec, min, suffix
function cdef_filter\w\suffix\()_neon
.if \pri
@@ -439,7 +440,7 @@
filter_func \w, pri=0, sec=1, min=0, suffix=_sec
filter_func \w, pri=1, sec=1, min=1, suffix=_pri_sec
-function cdef_filter\w\()_neon, export=1
+function cdef_filter\w\()_8bpc_neon, export=1
cbnz w3, 1f // pri_strength
b cdef_filter\w\()_sec_neon // only sec
1:
@@ -461,9 +462,9 @@
.short 420, 210, 140, 105, 105, 105, 105, 105, 140, 210, 420, 0
endconst
-// int dav1d_cdef_find_dir_neon(const pixel *img, const ptrdiff_t stride,
-// unsigned *const var)
-function cdef_find_dir_neon, export=1
+// int dav1d_cdef_find_dir_8bpc_neon(const pixel *img, const ptrdiff_t stride,
+// unsigned *const var)
+function cdef_find_dir_8bpc_neon, export=1
sub sp, sp, #32 // cost
mov w3, #8
movi v31.16b, #128
--- a/src/arm/cdef_init_tmpl.c
+++ b/src/arm/cdef_init_tmpl.c
@@ -28,37 +28,42 @@
#include "src/cdef.h"
#if BITDEPTH == 8
-decl_cdef_dir_fn(dav1d_cdef_find_dir_neon);
+decl_cdef_dir_fn(BF(dav1d_cdef_find_dir, neon));
-void dav1d_cdef_padding4_neon(uint16_t *tmp, const pixel *src,
- ptrdiff_t src_stride, const pixel (*left)[2],
- const pixel *const top, int h,
- enum CdefEdgeFlags edges);
-void dav1d_cdef_padding8_neon(uint16_t *tmp, const pixel *src,
- ptrdiff_t src_stride, const pixel (*left)[2],
- const pixel *const top, int h,
- enum CdefEdgeFlags edges);
+void BF(dav1d_cdef_padding4, neon)(uint16_t *tmp, const pixel *src,
+ ptrdiff_t src_stride, const pixel (*left)[2],
+ const pixel *const top, int h,
+ enum CdefEdgeFlags edges);
+void BF(dav1d_cdef_padding8, neon)(uint16_t *tmp, const pixel *src,
+ ptrdiff_t src_stride, const pixel (*left)[2],
+ const pixel *const top, int h,
+ enum CdefEdgeFlags edges);
-void dav1d_cdef_filter4_neon(pixel *dst, ptrdiff_t dst_stride,
- const uint16_t *tmp, int pri_strength,
- int sec_strength, int dir, int damping, int h);
-void dav1d_cdef_filter8_neon(pixel *dst, ptrdiff_t dst_stride,
- const uint16_t *tmp, int pri_strength,
- int sec_strength, int dir, int damping, int h);
+void BF(dav1d_cdef_filter4, neon)(pixel *dst, ptrdiff_t dst_stride,
+ const uint16_t *tmp, int pri_strength,
+ int sec_strength, int dir, int damping, int h
+ HIGHBD_DECL_SUFFIX);
+void BF(dav1d_cdef_filter8, neon)(pixel *dst, ptrdiff_t dst_stride,
+ const uint16_t *tmp, int pri_strength,
+ int sec_strength, int dir, int damping, int h
+ HIGHBD_DECL_SUFFIX);
#define DEFINE_FILTER(w, h, tmp_stride) \
static void \
-cdef_filter_##w##x##h##_neon(pixel *const dst, const ptrdiff_t stride, \
+cdef_filter_##w##x##h##_neon(pixel *dst, \
+ const ptrdiff_t stride, \
const pixel (*left)[2], const pixel *const top, \
const int pri_strength, const int sec_strength, \
const int dir, const int damping, \
- const enum CdefEdgeFlags edges) \
+ const enum CdefEdgeFlags edges \
+ HIGHBD_DECL_SUFFIX) \
{ \
ALIGN_STK_16(uint16_t, tmp_buf, 12 * tmp_stride + 8,); \
uint16_t *tmp = tmp_buf + 2 * tmp_stride + 8; \
- dav1d_cdef_padding##w##_neon(tmp, dst, stride, left, top, h, edges); \
- dav1d_cdef_filter##w##_neon(dst, stride, tmp, pri_strength, \
- sec_strength, dir, damping, h); \
+ BF(dav1d_cdef_padding##w, neon)(tmp, dst, stride, left, top, h, edges); \
+ BF(dav1d_cdef_filter##w, neon)(dst, stride, tmp, pri_strength, \
+ sec_strength, dir, damping, h \
+ HIGHBD_TAIL_SUFFIX); \
}
DEFINE_FILTER(8, 8, 16)
@@ -73,7 +78,7 @@
if (!(flags & DAV1D_ARM_CPU_FLAG_NEON)) return;
#if BITDEPTH == 8
- c->dir = dav1d_cdef_find_dir_neon;
+ c->dir = BF(dav1d_cdef_find_dir, neon);
c->fb[0] = cdef_filter_8x8_neon;
c->fb[1] = cdef_filter_4x8_neon;
c->fb[2] = cdef_filter_4x4_neon;