shithub: dav1d

Download patch

ref: 2f251bd11528a930934b77e2ee0056d5075a35e6
parent: 4b0683a615a353757ad75c4eb4ee67e12a0aa8ce
author: Ronald S. Bultje <rsbultje@gmail.com>
date: Fri Nov 9 09:13:05 EST 2018

Add a max_width/height argument to angular_ipred_fn

This is used in z2 to limit the number of pixels over which the
filter is applied, as per "numPx" in 7.11.2.4 point 4 in the AV1
specification. This only applies to z2, because in z1/3, the edge
filter is (incomprehensibly) lengtened by the opposite side's edge
length, which undoes the limit on the filter length (like a bug
undoing another bug).

I admit the code is getting rather complex, so we may want to
redesign this to make writing SIMD easier.

--- a/src/ipred.h
+++ b/src/ipred.h
@@ -41,7 +41,7 @@
  */
 #define decl_angular_ipred_fn(name) \
 void (name)(pixel *dst, ptrdiff_t stride, const pixel *topleft, \
-            int width, int height, int angle)
+            int width, int height, int angle, int max_width, int max_height)
 typedef decl_angular_ipred_fn(*angular_ipred_fn);
 
 /*
--- a/src/ipred_tmpl.c
+++ b/src/ipred_tmpl.c
@@ -83,8 +83,7 @@
     }
 }
 
-static unsigned dc_gen_top(const pixel *const topleft, const int width)
-{
+static unsigned dc_gen_top(const pixel *const topleft, const int width) {
     unsigned dc = width >> 1;
     for (int i = 0; i < width; i++)
        dc += topleft[1 + i];
@@ -93,7 +92,8 @@
 
 static void ipred_dc_top_c(pixel *dst, const ptrdiff_t stride,
                            const pixel *const topleft,
-                           const int width, const int height, const int a)
+                           const int width, const int height, const int a,
+                           const int max_width, const int max_height)
 {
     splat_dc(dst, stride, width, height, dc_gen_top(topleft, width));
 }
@@ -106,8 +106,7 @@
     cfl_pred(dst, stride, width, height, dc_gen_top(topleft, width), ac, alpha);
 }
 
-static unsigned dc_gen_left(const pixel *const topleft, const int height)
-{
+static unsigned dc_gen_left(const pixel *const topleft, const int height) {
     unsigned dc = height >> 1;
     for (int i = 0; i < height; i++)
        dc += topleft[-(1 + i)];
@@ -116,7 +115,8 @@
 
 static void ipred_dc_left_c(pixel *dst, const ptrdiff_t stride,
                             const pixel *const topleft,
-                            const int width, const int height, const int a)
+                            const int width, const int height, const int a,
+                            const int max_width, const int max_height)
 {
     splat_dc(dst, stride, width, height, dc_gen_left(topleft, height));
 }
@@ -140,8 +140,8 @@
 #define BASE_SHIFT 17
 #endif
 
-static unsigned
-dc_gen(const pixel *const topleft, const int width, const int height)
+static unsigned dc_gen(const pixel *const topleft,
+                       const int width, const int height)
 {
     unsigned dc = (width + height) >> 1;
     for (int i = 0; i < width; i++)
@@ -160,7 +160,8 @@
 
 static void ipred_dc_c(pixel *dst, const ptrdiff_t stride,
                        const pixel *const topleft,
-                       const int width, const int height, const int a)
+                       const int width, const int height, const int a,
+                       const int max_width, const int max_height)
 {
     splat_dc(dst, stride, width, height, dc_gen(topleft, width, height));
 }
@@ -180,7 +181,8 @@
 
 static void ipred_dc_128_c(pixel *dst, const ptrdiff_t stride,
                            const pixel *const topleft,
-                           const int width, const int height, const int a)
+                           const int width, const int height, const int a,
+                           const int max_width, const int max_height)
 {
     splat_dc(dst, stride, width, height, 1 << (BITDEPTH - 1));
 }
@@ -195,7 +197,8 @@
 
 static void ipred_v_c(pixel *dst, const ptrdiff_t stride,
                       const pixel *const topleft,
-                      const int width, const int height, const int a)
+                      const int width, const int height, const int a,
+                      const int max_width, const int max_height)
 {
     for (int y = 0; y < height; y++) {
         pixel_copy(dst, topleft + 1, width);
@@ -205,7 +208,8 @@
 
 static void ipred_h_c(pixel *dst, const ptrdiff_t stride,
                       const pixel *const topleft,
-                      const int width, const int height, const int a)
+                      const int width, const int height, const int a,
+                      const int max_width, const int max_height)
 {
     for (int y = 0; y < height; y++) {
         pixel_set(dst, topleft[-(1 + y)], width);
@@ -215,7 +219,8 @@
 
 static void ipred_paeth_c(pixel *dst, const ptrdiff_t stride,
                           const pixel *const tl_ptr,
-                          const int width, const int height, const int a)
+                          const int width, const int height, const int a,
+                          const int max_width, const int max_height)
 {
     const int topleft = tl_ptr[0];
     for (int y = 0; y < height; y++) {
@@ -236,7 +241,8 @@
 
 static void ipred_smooth_c(pixel *dst, const ptrdiff_t stride,
                            const pixel *const topleft,
-                           const int width, const int height, const int a)
+                           const int width, const int height, const int a,
+                           const int max_width, const int max_height)
 {
     const uint8_t *const weights_hor = &dav1d_sm_weights[width];
     const uint8_t *const weights_ver = &dav1d_sm_weights[height];
@@ -256,7 +262,8 @@
 
 static void ipred_smooth_v_c(pixel *dst, const ptrdiff_t stride,
                              const pixel *const topleft,
-                             const int width, const int height, const int a)
+                             const int width, const int height, const int a,
+                             const int max_width, const int max_height)
 {
     const uint8_t *const weights_ver = &dav1d_sm_weights[height];
     const int bottom = topleft[-height];
@@ -273,7 +280,8 @@
 
 static void ipred_smooth_h_c(pixel *dst, const ptrdiff_t stride,
                              const pixel *const topleft,
-                             const int width, const int height, const int a)
+                             const int width, const int height, const int a,
+                             const int max_width, const int max_height)
 {
     const uint8_t *const weights_hor = &dav1d_sm_weights[width];
     const int right = topleft[width];
@@ -328,7 +336,9 @@
     return strength;
 }
 
-static void filter_edge(pixel *const out, const int sz, const pixel *const in,
+static void filter_edge(pixel *const out, const int sz,
+                        const int lim_from, const int lim_to,
+                        const pixel *const in,
                         const int from, const int to, const unsigned strength)
 {
     static const uint8_t kernel[3][5] = {
@@ -338,12 +348,17 @@
     };
 
     assert(strength > 0);
-    for (int i = 0; i < sz; i++) {
+    int i = 0;
+    for (; i < lim_from; i++)
+        out[i] = in[iclip(i, from, to - 1)];
+    for (; i < imin(lim_to, sz); i++) {
         int s = 0;
         for (int j = 0; j < 5; j++)
             s += in[iclip(i - 2 + j, from, to - 1)] * kernel[strength - 1][j];
         out[i] = (s + 8) >> 4;
     }
+    for (; i < sz; i++)
+        out[i] = in[iclip(i, from, to - 1)];
 }
 
 static int get_upsample(const int blk_wh, const unsigned d, const int type) {
@@ -369,7 +384,8 @@
 
 static void ipred_z1_c(pixel *dst, const ptrdiff_t stride,
                        const pixel *const topleft_in,
-                       const int width, const int height, int angle)
+                       const int width, const int height, int angle,
+                       const int max_width, const int max_height)
 {
     const int is_sm = angle >> 9;
     angle &= 511;
@@ -389,7 +405,7 @@
             get_filter_strength(width + height, 90 - angle, is_sm);
 
         if (filter_strength) {
-            filter_edge(top_out, width + height,
+            filter_edge(top_out, width + height, 0, width + height,
                         &topleft_in[1], -1, width + imin(width, height),
                         filter_strength);
             top = top_out;
@@ -421,7 +437,8 @@
 
 static void ipred_z2_c(pixel *dst, const ptrdiff_t stride,
                        const pixel *const topleft_in,
-                       const int width, const int height, int angle)
+                       const int width, const int height, int angle,
+                       const int max_width, const int max_height)
 {
     const int is_sm = angle >> 9;
     angle &= 511;
@@ -440,7 +457,8 @@
             get_filter_strength(width + height, angle - 90, is_sm);
 
         if (filter_strength) {
-            filter_edge(&topleft[1], width, &topleft_in[1], -1, width,
+            filter_edge(&topleft[1], width, 0, max_width,
+                        &topleft_in[1], -1, width,
                         filter_strength);
         } else {
             pixel_copy(&topleft[1], &topleft_in[1], width);
@@ -453,7 +471,8 @@
             get_filter_strength(width + height, 180 - angle, is_sm);
 
         if (filter_strength) {
-            filter_edge(&topleft[-height], height, &topleft_in[-height],
+            filter_edge(&topleft[-height], height, height - max_height, height,
+                        &topleft_in[-height],
                         0, height + 1, filter_strength);
         } else {
             pixel_copy(&topleft[-height], &topleft_in[-height], height);
@@ -492,7 +511,8 @@
 
 static void ipred_z3_c(pixel *dst, const ptrdiff_t stride,
                        const pixel *const topleft_in,
-                       const int width, const int height, int angle)
+                       const int width, const int height, int angle,
+                       const int max_width, const int max_height)
 {
     const int is_sm = angle >> 9;
     angle &= 511;
@@ -513,7 +533,7 @@
             get_filter_strength(width + height, angle - 180, is_sm);
 
         if (filter_strength) {
-            filter_edge(left_out, width + height,
+            filter_edge(left_out, width + height, 0, width + height,
                         &topleft_in[-(width + height)],
                         imax(width - height, 0), width + height + 1,
                         filter_strength);
@@ -548,7 +568,8 @@
 /* Up to 32x32 only */
 static void ipred_filter_c(pixel *dst, const ptrdiff_t stride,
                            const pixel *const topleft_in,
-                           const int width, const int height, int filt_idx)
+                           const int width, const int height, int filt_idx,
+                           const int max_width, const int max_height)
 {
     filt_idx &= 511;
     assert(filt_idx < 5);
--- a/src/recon_tmpl.c
+++ b/src/recon_tmpl.c
@@ -766,7 +766,9 @@
                                                           t_dim->w, t_dim->h, edge);
                     dsp->ipred.intra_pred[m](dst, f->cur.p.stride[0], edge,
                                              t_dim->w * 4, t_dim->h * 4,
-                                             angle | sm_fl);
+                                             angle | sm_fl,
+                                             f->cur.p.p.w - 4 * t->bx,
+                                             f->cur.p.p.h - 4 * t->by);
 
                     if (DEBUG_BLOCK_INFO && DEBUG_B_PIXELS) {
                         hex_dump(edge - t_dim->h * 4, t_dim->h * 4,
@@ -981,7 +983,11 @@
                         dsp->ipred.intra_pred[m](dst, stride, edge,
                                                  uv_t_dim->w * 4,
                                                  uv_t_dim->h * 4,
-                                                 angle | sm_uv_fl);
+                                                 angle | sm_uv_fl,
+                                                 (f->cur.p.p.w + ss_hor -
+                                                  4 * (t->bx & ~ss_hor)) >> ss_hor,
+                                                 (f->cur.p.p.w + ss_ver -
+                                                  4 * (t->by & ~ss_ver)) >> ss_ver);
                         if (DEBUG_BLOCK_INFO && DEBUG_B_PIXELS) {
                             hex_dump(edge - uv_t_dim->h * 4, uv_t_dim->h * 4,
                                      uv_t_dim->h * 4, 2, "l");
@@ -1136,7 +1142,7 @@
                                                   0, dst, f->cur.p.stride[0], top_sb_edge,
                                                   m, &angle, bw4, bh4, tl_edge);
             dsp->ipred.intra_pred[m](tmp, 4 * bw4 * sizeof(pixel),
-                                     tl_edge, bw4 * 4, bh4 * 4, 0);
+                                     tl_edge, bw4 * 4, bh4 * 4, 0, 0, 0);
             const uint8_t *const ii_mask =
                 b->interintra_type == INTER_INTRA_BLEND ?
                      dav1d_ii_masks[bs][0][b->interintra_mode] :
@@ -1273,7 +1279,7 @@
                                                           top_sb_edge, m,
                                                           &angle, cbw4, cbh4, tl_edge);
                     dsp->ipred.intra_pred[m](tmp, cbw4 * 4 * sizeof(pixel),
-                                             tl_edge, cbw4 * 4, cbh4 * 4, 0);
+                                             tl_edge, cbw4 * 4, cbh4 * 4, 0, 0, 0);
                     dsp->mc.blend(uvdst, f->cur.p.stride[1], tmp,
                                   cbw4 * 4, cbh4 * 4, ii_mask);
                 }
--- a/tests/checkasm/ipred.c
+++ b/tests/checkasm/ipred.c
@@ -68,7 +68,7 @@
     pixel *const topleft = topleft_buf + 128;
 
     declare_func(void, pixel *dst, ptrdiff_t stride, const pixel *topleft,
-                 int width, int height, int angle);
+                 int width, int height, int angle, int max_width, int max_height);
 
     for (int mode = 0; mode < N_IMPL_INTRA_PRED_MODES; mode++)
         for (int w = 4; w <= (mode == FILTER_PRED ? 32 : 64); w <<= 1)
@@ -89,12 +89,13 @@
                     for (int i = -h * 2; i <= w * 2; i++)
                         topleft[i] = rand() & ((1 << BITDEPTH) - 1);
 
-                    call_ref(c_dst, stride, topleft, w, h, a);
-                    call_new(a_dst, stride, topleft, w, h, a);
+                    const int maxw = 1 + (rand() % 128), maxh = 1 + (rand() % 128);
+                    call_ref(c_dst, stride, topleft, w, h, a, maxw, maxh);
+                    call_new(a_dst, stride, topleft, w, h, a, maxw, maxh);
                     if (memcmp(c_dst, a_dst, w * h * sizeof(*c_dst)))
                         fail();
 
-                    bench_new(a_dst, stride, topleft, w, h, a);
+                    bench_new(a_dst, stride, topleft, w, h, a, 128, 128);
                 }
             }
     report("intra_pred");