shithub: dav1d

Download patch

ref: 1d5ef8df0d76785bbf47218a179f541151aafe3e
parent: 19ce77e0b9ec60e0a3a748ae9c2c2b417d3b75b5
author: Martin Storsjö <martin@martin.st>
date: Thu Feb 6 04:09:47 EST 2020

arm: cdef: Prepare for 16bpc

--- a/src/arm/32/cdef.S
+++ b/src/arm/32/cdef.S
@@ -148,10 +148,10 @@
 .endif
 .endm
 
-// void dav1d_cdef_paddingX_neon(uint16_t *tmp, const pixel *src,
-//                               ptrdiff_t src_stride, const pixel (*left)[2],
-//                               const pixel *const top, int h,
-//                               enum CdefEdgeFlags edges);
+// void dav1d_cdef_paddingX_8bpc_neon(uint16_t *tmp, const pixel *src,
+//                                    ptrdiff_t src_stride, const pixel (*left)[2],
+//                                    const pixel *const top, int h,
+//                                    enum CdefEdgeFlags edges);
 
 // n1 = s0/d0
 // w1 = d0/q0
@@ -158,7 +158,7 @@
 // n2 = s4/d2
 // w2 = d2/q1
 .macro padding_func w, stride, n1, w1, n2, w2, align
-function cdef_padding\w\()_neon, export=1
+function cdef_padding\w\()_8bpc_neon, export=1
         push            {r4-r7,lr}
         ldrd            r4,  r5,  [sp, #20]
         ldr             r6,  [sp, #28]
@@ -337,9 +337,10 @@
 3:
 .endm
 
-// void dav1d_cdef_filterX_neon(pixel *dst, ptrdiff_t dst_stride,
-//                              const uint16_t *tmp, int pri_strength,
-//                              int sec_strength, int dir, int damping, int h);
+// void dav1d_cdef_filterX_8bpc_neon(pixel *dst, ptrdiff_t dst_stride,
+//                                   const uint16_t *tmp, int pri_strength,
+//                                   int sec_strength, int dir, int damping,
+//                                   int h);
 .macro filter_func w, pri, sec, min, suffix
 function cdef_filter\w\suffix\()_neon
 .if \pri
@@ -468,7 +469,7 @@
 filter_func \w, pri=0, sec=1, min=0, suffix=_sec
 filter_func \w, pri=1, sec=1, min=1, suffix=_pri_sec
 
-function cdef_filter\w\()_neon, export=1
+function cdef_filter\w\()_8bpc_neon, export=1
         push            {r4-r9,lr}
         vpush           {q4-q7}
         ldrd            r4,  r5,  [sp, #92]
@@ -496,9 +497,9 @@
         .short         420, 210, 140, 105, 105, 105, 105, 105, 140, 210, 420, 0
 endconst
 
-// int dav1d_cdef_find_dir_neon(const pixel *img, const ptrdiff_t stride,
-//                              unsigned *const var)
-function cdef_find_dir_neon, export=1
+// int dav1d_cdef_find_dir_8bpc_neon(const pixel *img, const ptrdiff_t stride,
+//                                   unsigned *const var)
+function cdef_find_dir_8bpc_neon, export=1
         push            {lr}
         vpush           {q4-q7}
         sub             sp,  sp,  #32          // cost
--- a/src/arm/64/cdef.S
+++ b/src/arm/64/cdef.S
@@ -137,13 +137,13 @@
 .endif
 .endm
 
-// void dav1d_cdef_paddingX_neon(uint16_t *tmp, const pixel *src,
-//                               ptrdiff_t src_stride, const pixel (*left)[2],
-//                               const pixel *const top, int h,
-//                               enum CdefEdgeFlags edges);
+// void dav1d_cdef_paddingX_8bpc_neon(uint16_t *tmp, const pixel *src,
+//                                    ptrdiff_t src_stride, const pixel (*left)[2],
+//                                    const pixel *const top, int h,
+//                                    enum CdefEdgeFlags edges);
 
 .macro padding_func w, stride, rn, rw
-function cdef_padding\w\()_neon, export=1
+function cdef_padding\w\()_8bpc_neon, export=1
         movi            v30.8h,  #0x80, lsl #8
         mov             v31.16b, v30.16b
         sub             x0,  x0,  #2*(2*\stride+2)
@@ -312,9 +312,10 @@
 3:
 .endm
 
-// void dav1d_cdef_filterX_neon(pixel *dst, ptrdiff_t dst_stride,
-//                              const uint16_t *tmp, int pri_strength,
-//                              int sec_strength, int dir, int damping, int h);
+// void dav1d_cdef_filterX_8bpc_neon(pixel *dst, ptrdiff_t dst_stride,
+//                                   const uint16_t *tmp, int pri_strength,
+//                                   int sec_strength, int dir, int damping,
+//                                   int h);
 .macro filter_func w, pri, sec, min, suffix
 function cdef_filter\w\suffix\()_neon
 .if \pri
@@ -439,7 +440,7 @@
 filter_func \w, pri=0, sec=1, min=0, suffix=_sec
 filter_func \w, pri=1, sec=1, min=1, suffix=_pri_sec
 
-function cdef_filter\w\()_neon, export=1
+function cdef_filter\w\()_8bpc_neon, export=1
         cbnz            w3,  1f // pri_strength
         b               cdef_filter\w\()_sec_neon // only sec
 1:
@@ -461,9 +462,9 @@
         .short         420, 210, 140, 105, 105, 105, 105, 105, 140, 210, 420, 0
 endconst
 
-// int dav1d_cdef_find_dir_neon(const pixel *img, const ptrdiff_t stride,
-//                              unsigned *const var)
-function cdef_find_dir_neon, export=1
+// int dav1d_cdef_find_dir_8bpc_neon(const pixel *img, const ptrdiff_t stride,
+//                                   unsigned *const var)
+function cdef_find_dir_8bpc_neon, export=1
         sub             sp,  sp,  #32 // cost
         mov             w3,  #8
         movi            v31.16b, #128
--- a/src/arm/cdef_init_tmpl.c
+++ b/src/arm/cdef_init_tmpl.c
@@ -28,37 +28,42 @@
 #include "src/cdef.h"
 
 #if BITDEPTH == 8
-decl_cdef_dir_fn(dav1d_cdef_find_dir_neon);
+decl_cdef_dir_fn(BF(dav1d_cdef_find_dir, neon));
 
-void dav1d_cdef_padding4_neon(uint16_t *tmp, const pixel *src,
-                              ptrdiff_t src_stride, const pixel (*left)[2],
-                              const pixel *const top, int h,
-                              enum CdefEdgeFlags edges);
-void dav1d_cdef_padding8_neon(uint16_t *tmp, const pixel *src,
-                              ptrdiff_t src_stride, const pixel (*left)[2],
-                              const pixel *const top, int h,
-                              enum CdefEdgeFlags edges);
+void BF(dav1d_cdef_padding4, neon)(uint16_t *tmp, const pixel *src,
+                                   ptrdiff_t src_stride, const pixel (*left)[2],
+                                   const pixel *const top, int h,
+                                   enum CdefEdgeFlags edges);
+void BF(dav1d_cdef_padding8, neon)(uint16_t *tmp, const pixel *src,
+                                   ptrdiff_t src_stride, const pixel (*left)[2],
+                                   const pixel *const top, int h,
+                                   enum CdefEdgeFlags edges);
 
-void dav1d_cdef_filter4_neon(pixel *dst, ptrdiff_t dst_stride,
-                             const uint16_t *tmp, int pri_strength,
-                             int sec_strength, int dir, int damping, int h);
-void dav1d_cdef_filter8_neon(pixel *dst, ptrdiff_t dst_stride,
-                             const uint16_t *tmp, int pri_strength,
-                             int sec_strength, int dir, int damping, int h);
+void BF(dav1d_cdef_filter4, neon)(pixel *dst, ptrdiff_t dst_stride,
+                                  const uint16_t *tmp, int pri_strength,
+                                  int sec_strength, int dir, int damping, int h
+                                  HIGHBD_DECL_SUFFIX);
+void BF(dav1d_cdef_filter8, neon)(pixel *dst, ptrdiff_t dst_stride,
+                                  const uint16_t *tmp, int pri_strength,
+                                  int sec_strength, int dir, int damping, int h
+                                  HIGHBD_DECL_SUFFIX);
 
 #define DEFINE_FILTER(w, h, tmp_stride)                                      \
 static void                                                                  \
-cdef_filter_##w##x##h##_neon(pixel *const dst, const ptrdiff_t stride,       \
+cdef_filter_##w##x##h##_neon(pixel *dst,                                     \
+                             const ptrdiff_t stride,                         \
                              const pixel (*left)[2], const pixel *const top, \
                              const int pri_strength, const int sec_strength, \
                              const int dir, const int damping,               \
-                             const enum CdefEdgeFlags edges)                 \
+                             const enum CdefEdgeFlags edges                  \
+                             HIGHBD_DECL_SUFFIX)                             \
 {                                                                            \
     ALIGN_STK_16(uint16_t, tmp_buf, 12 * tmp_stride + 8,);                   \
     uint16_t *tmp = tmp_buf + 2 * tmp_stride + 8;                            \
-    dav1d_cdef_padding##w##_neon(tmp, dst, stride, left, top, h, edges);     \
-    dav1d_cdef_filter##w##_neon(dst, stride, tmp, pri_strength,              \
-                                sec_strength, dir, damping, h);              \
+    BF(dav1d_cdef_padding##w, neon)(tmp, dst, stride, left, top, h, edges);  \
+    BF(dav1d_cdef_filter##w, neon)(dst, stride, tmp, pri_strength,           \
+                                   sec_strength, dir, damping, h             \
+                                   HIGHBD_TAIL_SUFFIX);                      \
 }
 
 DEFINE_FILTER(8, 8, 16)
@@ -73,7 +78,7 @@
     if (!(flags & DAV1D_ARM_CPU_FLAG_NEON)) return;
 
 #if BITDEPTH == 8
-    c->dir = dav1d_cdef_find_dir_neon;
+    c->dir = BF(dav1d_cdef_find_dir, neon);
     c->fb[0] = cdef_filter_8x8_neon;
     c->fb[1] = cdef_filter_4x8_neon;
     c->fb[2] = cdef_filter_4x4_neon;