shithub: dav1d

Download patch

ref: ebbf91f44422c7a2778a9e524a9cc7a1b5c66dcf
parent: ac492552babb8c7916a9347768f4dcacbb47dd42
author: Martin Storsjö <martin@martin.st>
date: Mon Feb 17 19:13:43 EST 2020

arm: loopfilter: Prepare for 16 bpc

--- a/src/arm/32/loopfilter.S
+++ b/src/arm/32/loopfilter.S
@@ -734,13 +734,13 @@
         bx              r12
 endfunc
 
-// void dav1d_lpf_v_sb_y_neon(pixel *dst, const ptrdiff_t stride,
-//                            const uint32_t *const vmask,
-//                            const uint8_t (*l)[4], ptrdiff_t b4_stride,
-//                            const Av1FilterLUT *lut, const int w)
+// void dav1d_lpf_v_sb_y_8bpc_neon(pixel *dst, const ptrdiff_t stride,
+//                                 const uint32_t *const vmask,
+//                                 const uint8_t (*l)[4], ptrdiff_t b4_stride,
+//                                 const Av1FilterLUT *lut, const int w)
 
 .macro lpf_func dir, type
-function lpf_\dir\()_sb_\type\()_neon, export=1
+function lpf_\dir\()_sb_\type\()_8bpc_neon, export=1
         push            {r4-r11,lr}
         vpush           {q4-q7}
         ldrd            r4,  r5,  [sp, #100]
--- a/src/arm/64/loopfilter.S
+++ b/src/arm/64/loopfilter.S
@@ -981,13 +981,13 @@
         br              x15
 endfunc
 
-// void dav1d_lpf_v_sb_y_neon(pixel *dst, const ptrdiff_t stride,
-//                            const uint32_t *const vmask,
-//                            const uint8_t (*l)[4], ptrdiff_t b4_stride,
-//                            const Av1FilterLUT *lut, const int w)
+// void dav1d_lpf_v_sb_y_8bpc_neon(pixel *dst, const ptrdiff_t stride,
+//                                 const uint32_t *const vmask,
+//                                 const uint8_t (*l)[4], ptrdiff_t b4_stride,
+//                                 const Av1FilterLUT *lut, const int w)
 
 .macro lpf_func dir, type
-function lpf_\dir\()_sb_\type\()_neon, export=1
+function lpf_\dir\()_sb_\type\()_8bpc_neon, export=1
         mov             x11, x30
         stp             d8,  d9,  [sp, #-0x40]!
         stp             d10, d11, [sp, #0x10]
--- a/src/arm/loopfilter_init_tmpl.c
+++ b/src/arm/loopfilter_init_tmpl.c
@@ -28,10 +28,10 @@
 #include "src/cpu.h"
 #include "src/loopfilter.h"
 
-decl_loopfilter_sb_fn(dav1d_lpf_h_sb_y_neon);
-decl_loopfilter_sb_fn(dav1d_lpf_v_sb_y_neon);
-decl_loopfilter_sb_fn(dav1d_lpf_h_sb_uv_neon);
-decl_loopfilter_sb_fn(dav1d_lpf_v_sb_uv_neon);
+decl_loopfilter_sb_fn(BF(dav1d_lpf_h_sb_y, neon));
+decl_loopfilter_sb_fn(BF(dav1d_lpf_v_sb_y, neon));
+decl_loopfilter_sb_fn(BF(dav1d_lpf_h_sb_uv, neon));
+decl_loopfilter_sb_fn(BF(dav1d_lpf_v_sb_uv, neon));
 
 COLD void bitfn(dav1d_loop_filter_dsp_init_arm)(Dav1dLoopFilterDSPContext *const c) {
     const unsigned flags = dav1d_get_cpu_flags();
@@ -39,9 +39,9 @@
     if (!(flags & DAV1D_ARM_CPU_FLAG_NEON)) return;
 
 #if BITDEPTH == 8
-    c->loop_filter_sb[0][0] = dav1d_lpf_h_sb_y_neon;
-    c->loop_filter_sb[0][1] = dav1d_lpf_v_sb_y_neon;
-    c->loop_filter_sb[1][0] = dav1d_lpf_h_sb_uv_neon;
-    c->loop_filter_sb[1][1] = dav1d_lpf_v_sb_uv_neon;
+    c->loop_filter_sb[0][0] = BF(dav1d_lpf_h_sb_y, neon);
+    c->loop_filter_sb[0][1] = BF(dav1d_lpf_v_sb_y, neon);
+    c->loop_filter_sb[1][0] = BF(dav1d_lpf_h_sb_uv, neon);
+    c->loop_filter_sb[1][1] = BF(dav1d_lpf_v_sb_uv, neon);
 #endif
 }