shithub: dav1d

Download patch

ref: 47a581a53d584214b528bcfcbe0898de9149948b
parent: 404a8fbfb95fc1521f40094f2c7c636d21cb56fe
author: Ronald S. Bultje <rsbultje@gmail.com>
date: Tue Nov 6 06:42:15 EST 2018

Change emu_edge function argument order for easier SIMD

--- a/src/mc.h
+++ b/src/mc.h
@@ -87,9 +87,8 @@
 typedef decl_blend_fn(*blend_fn);
 
 #define decl_emu_edge_fn(name) \
-void (name)(pixel *dst, ptrdiff_t dst_stride, \
-            const pixel *src, ptrdiff_t src_stride, \
-            int bw, int bh, int iw, int ih, int x, int y)
+void (name)(intptr_t bw, intptr_t bh, intptr_t iw, intptr_t ih, intptr_t x, intptr_t y, \
+            pixel *dst, ptrdiff_t dst_stride, const pixel *src, ptrdiff_t src_stride)
 typedef decl_emu_edge_fn(*emu_edge_fn);
 
 typedef struct Dav1dMCDSPContext {
--- a/src/mc_tmpl.c
+++ b/src/mc_tmpl.c
@@ -521,11 +521,11 @@
     }
 }
 
-static void emu_edge_c(pixel *dst, const ptrdiff_t dst_stride,
-                       const pixel *ref, const ptrdiff_t ref_stride,
-                       const int bw, const int bh,
-                       const int iw, const int ih,
-                       const int x, const int y)
+static void emu_edge_c(const intptr_t bw, const intptr_t bh,
+                       const intptr_t iw, const intptr_t ih,
+                       const intptr_t x, const intptr_t y,
+                       pixel *dst, const ptrdiff_t dst_stride,
+                       const pixel *ref, const ptrdiff_t ref_stride)
 {
     // find offset in reference of visible block to copy
     ref += iclip(y, 0, ih - 1) * PXSTRIDE(ref_stride) + iclip(x, 0, iw - 1);
--- a/src/recon_tmpl.c
+++ b/src/recon_tmpl.c
@@ -528,10 +528,10 @@
         dx + bw4 * h_mul + !!mx * 4 > w ||
         dy + bh4 * v_mul + !!my * 4 > h)
     {
-        f->dsp->mc.emu_edge(t->emu_edge, 160 * sizeof(pixel),
-                            refp->p.data[pl], ref_stride,
-                            bw4 * h_mul + !!mx * 7, bh4 * v_mul + !!my * 7,
-                            w, h, dx - !!mx * 3, dy - !!my * 3);
+        f->dsp->mc.emu_edge(bw4 * h_mul + !!mx * 7, bh4 * v_mul + !!my * 7,
+                            w, h, dx - !!mx * 3, dy - !!my * 3,
+                            t->emu_edge, 160 * sizeof(pixel),
+                            refp->p.data[pl], ref_stride);
         ref = &t->emu_edge[160 * !!my * 3 + !!mx * 3];
         ref_stride = 160 * sizeof(pixel);
     } else {
@@ -667,9 +667,9 @@
                 return -1;
             }
             if (dx < 3 || dx + 8 + 4 > width || dy < 3 || dy + 8 + 4 > height) {
-                f->dsp->mc.emu_edge(t->emu_edge, 160 * sizeof(pixel),
-                                    refp->p.data[pl], ref_stride,
-                                    15, 15, width, height, dx - 3, dy - 3);
+                f->dsp->mc.emu_edge(15, 15, width, height, dx - 3, dy - 3,
+                                    t->emu_edge, 160 * sizeof(pixel),
+                                    refp->p.data[pl], ref_stride);
                 ref_ptr = &t->emu_edge[160 * 3 + 3];
                 ref_stride = 160 * sizeof(pixel);
             } else {
--- a/tests/checkasm/mc.c
+++ b/tests/checkasm/mc.c
@@ -346,16 +346,17 @@
 }
 
 static void check_emuedge(Dav1dMCDSPContext *const c) {
-    ALIGN_STK_32(pixel, c_dst, 135 * 160,);
-    ALIGN_STK_32(pixel, a_dst, 135 * 160,);
+    ALIGN_STK_32(pixel, c_dst, 135 * 192,);
+    ALIGN_STK_32(pixel, a_dst, 135 * 192,);
     ALIGN_STK_32(pixel, src,   160 * 160,);
 
     for (int i = 0; i < 160 * 160; i++)
         src[i] = rand() & ((1U << BITDEPTH) - 1);
 
-    declare_func(void, pixel *dst, ptrdiff_t dst_stride,
-                 const pixel *src, ptrdiff_t src_stride,
-                 int bw, int bh, int iw, int ih, int x, int y);
+    declare_func(void, intptr_t bw, intptr_t bh, intptr_t iw, intptr_t ih,
+                 intptr_t x, intptr_t y,
+                 pixel *dst, ptrdiff_t dst_stride,
+                 const pixel *src, ptrdiff_t src_stride);
 
     int x, y, iw, ih;
     for (int w = 4; w <= 128; w <<= 1)
@@ -366,18 +367,18 @@
                     const int bw = w + (rand() & 7);
                     const int bh = h + (rand() & 7);
                     random_offset_for_edge(&x, &y, bw, bh, &iw, &ih, edge);
-                    call_ref(c_dst, 160 * sizeof(pixel), src, 160 * sizeof(pixel),
-                             bw, bh, iw, ih, x, y);
-                    call_new(a_dst, 160 * sizeof(pixel), src, 160 * sizeof(pixel),
-                             bw, bh, iw, ih, x, y);
-                    const int res = cmp2d(c_dst, a_dst, 160 * sizeof(pixel), bw, bh);
+                    call_ref(bw, bh, iw, ih, x, y,
+                             c_dst, 192 * sizeof(pixel), src, 160 * sizeof(pixel));
+                    call_new(bw, bh, iw, ih, x, y,
+                             a_dst, 192 * sizeof(pixel), src, 160 * sizeof(pixel));
+                    const int res = cmp2d(c_dst, a_dst, 192 * sizeof(pixel), bw, bh);
                     if (res != -1) fail();
                 }
             }
             for (enum EdgeFlags edge = 1; edge < 0xf; edge <<= 1) {
                 random_offset_for_edge(&x, &y, w + 7, w + 7, &iw, &ih, edge);
-                bench_new(a_dst, 160 * sizeof(pixel), src, 160 * sizeof(pixel),
-                          w + 7, w + 7, iw, ih, x, y);
+                bench_new(w + 7, w + 7, iw, ih, x, y,
+                          a_dst, 192 * sizeof(pixel), src, 160 * sizeof(pixel));
             }
         }
     report("emu_edge");