shithub: dav1d

Download patch

ref: e10b855c530a3a5ed519fcb3f3ec085a6e0aa79e
parent: d401106b2a764422d6233cb2f5f3d90a8e5883c4
author: David Michael Barr <b@rr-dav.id.au>
date: Fri Nov 9 14:41:04 EST 2018

Pass dimensions to cfl_ac and derive log2sz

--- a/src/ipred.h
+++ b/src/ipred.h
@@ -52,7 +52,7 @@
  */
 #define decl_cfl_ac_fn(name) \
 void (name)(int16_t *ac, const pixel *y, ptrdiff_t stride, \
-            int w_pad, int h_pad)
+            int w_pad, int h_pad, int cw, int ch)
 typedef decl_cfl_ac_fn(*cfl_ac_fn);
 
 /*
@@ -77,7 +77,7 @@
     angular_ipred_fn intra_pred[N_IMPL_INTRA_PRED_MODES];
 
     // chroma-from-luma
-    cfl_ac_fn cfl_ac[3 /* 420, 422, 444 */][N_RECT_TX_SIZES /* chroma tx size */];
+    cfl_ac_fn cfl_ac[3 /* 420, 422, 444 */];
     cfl_pred_fn cfl_pred[DC_128_PRED + 1];
 
     // palette
--- a/src/ipred_tmpl.c
+++ b/src/ipred_tmpl.c
@@ -614,7 +614,7 @@
 static NOINLINE void
 cfl_ac_c(int16_t *ac, const pixel *ypx, const ptrdiff_t stride,
          const int w_pad, const int h_pad, const int width, const int height,
-         const int ss_hor, const int ss_ver, const int log2sz)
+         const int ss_hor, const int ss_ver)
 {
     int y, x;
     int16_t *const ac_orig = ac;
@@ -642,6 +642,7 @@
         ac += width;
     }
 
+    const int log2sz = ctz(width) + ctz(height);
     int sum = (1 << log2sz) >> 1;
     for (ac = ac_orig, y = 0; y < height; y++) {
         for (x = 0; x < width; x++)
@@ -658,50 +659,18 @@
     }
 }
 
-#define cfl_ac_fn(lw, lh, cw, ch, ss_hor, ss_ver, log2sz) \
-static void cfl_ac_##lw##x##lh##_to_##cw##x##ch##_c(int16_t *const ac, \
-                                                    const pixel *const ypx, \
-                                                    const ptrdiff_t stride, \
-                                                    const int w_pad, \
-                                                    const int h_pad) \
+#define cfl_ac_fn(fmt, ss_hor, ss_ver) \
+static void cfl_ac_##fmt##_c(int16_t *const ac, const pixel *const ypx, \
+                             const ptrdiff_t stride, const int w_pad, \
+                             const int h_pad, const int cw, const int ch) \
 { \
-    cfl_ac_c(ac, ypx, stride, w_pad, h_pad, cw, ch, ss_hor, ss_ver, log2sz); \
+    cfl_ac_c(ac, ypx, stride, w_pad, h_pad, cw, ch, ss_hor, ss_ver); \
 }
 
-cfl_ac_fn( 8,  8,  4,  4, 1, 1, 4)
-cfl_ac_fn( 8, 16,  4,  8, 1, 1, 5)
-cfl_ac_fn( 8, 32,  4, 16, 1, 1, 6)
-cfl_ac_fn(16,  8,  8,  4, 1, 1, 5)
-cfl_ac_fn(16, 16,  8,  8, 1, 1, 6)
-cfl_ac_fn(16, 32,  8, 16, 1, 1, 7)
-cfl_ac_fn(32,  8, 16,  4, 1, 1, 6)
-cfl_ac_fn(32, 16, 16,  8, 1, 1, 7)
-cfl_ac_fn(32, 32, 16, 16, 1, 1, 8)
+cfl_ac_fn(420, 1, 1)
+cfl_ac_fn(422, 1, 0)
+cfl_ac_fn(444, 0, 0)
 
-cfl_ac_fn( 8,  4,  4,  4, 1, 0, 4)
-cfl_ac_fn( 8,  8,  4,  8, 1, 0, 5)
-cfl_ac_fn(16,  4,  8,  4, 1, 0, 5)
-cfl_ac_fn(16,  8,  8,  8, 1, 0, 6)
-cfl_ac_fn(16, 16,  8, 16, 1, 0, 7)
-cfl_ac_fn(32,  8, 16,  8, 1, 0, 7)
-cfl_ac_fn(32, 16, 16, 16, 1, 0, 8)
-cfl_ac_fn(32, 32, 16, 32, 1, 0, 9)
-
-cfl_ac_fn( 4,  4,  4,  4, 0, 0, 4)
-cfl_ac_fn( 4,  8,  4,  8, 0, 0, 5)
-cfl_ac_fn( 4, 16,  4, 16, 0, 0, 6)
-cfl_ac_fn( 8,  4,  8,  4, 0, 0, 5)
-cfl_ac_fn( 8,  8,  8,  8, 0, 0, 6)
-cfl_ac_fn( 8, 16,  8, 16, 0, 0, 7)
-cfl_ac_fn( 8, 32,  8, 32, 0, 0, 8)
-cfl_ac_fn(16,  4, 16,  4, 0, 0, 6)
-cfl_ac_fn(16,  8, 16,  8, 0, 0, 7)
-cfl_ac_fn(16, 16, 16, 16, 0, 0, 8)
-cfl_ac_fn(16, 32, 16, 32, 0, 0, 9)
-cfl_ac_fn(32,  8, 32,  8, 0, 0, 8)
-cfl_ac_fn(32, 16, 32, 16, 0, 0, 9)
-cfl_ac_fn(32, 32, 32, 32, 0, 0, 10)
-
 static void pal_pred_c(pixel *dst, const ptrdiff_t stride,
                        const uint16_t *const pal, const uint8_t *idx,
                        const int w, const int h)
@@ -730,40 +699,9 @@
     c->intra_pred[Z3_PRED      ] = ipred_z3_c;
     c->intra_pred[FILTER_PRED  ] = ipred_filter_c;
 
-    // cfl functions are split per chroma subsampling type
-    c->cfl_ac[DAV1D_PIXEL_LAYOUT_I420 - 1][ TX_4X4  ] = cfl_ac_8x8_to_4x4_c;
-    c->cfl_ac[DAV1D_PIXEL_LAYOUT_I420 - 1][RTX_4X8  ] = cfl_ac_8x16_to_4x8_c;
-    c->cfl_ac[DAV1D_PIXEL_LAYOUT_I420 - 1][RTX_4X16 ] = cfl_ac_8x32_to_4x16_c;
-    c->cfl_ac[DAV1D_PIXEL_LAYOUT_I420 - 1][RTX_8X4  ] = cfl_ac_16x8_to_8x4_c;
-    c->cfl_ac[DAV1D_PIXEL_LAYOUT_I420 - 1][ TX_8X8  ] = cfl_ac_16x16_to_8x8_c;
-    c->cfl_ac[DAV1D_PIXEL_LAYOUT_I420 - 1][RTX_8X16 ] = cfl_ac_16x32_to_8x16_c;
-    c->cfl_ac[DAV1D_PIXEL_LAYOUT_I420 - 1][RTX_16X4 ] = cfl_ac_32x8_to_16x4_c;
-    c->cfl_ac[DAV1D_PIXEL_LAYOUT_I420 - 1][RTX_16X8 ] = cfl_ac_32x16_to_16x8_c;
-    c->cfl_ac[DAV1D_PIXEL_LAYOUT_I420 - 1][ TX_16X16] = cfl_ac_32x32_to_16x16_c;
-
-    c->cfl_ac[DAV1D_PIXEL_LAYOUT_I422 - 1][ TX_4X4  ] = cfl_ac_8x4_to_4x4_c;
-    c->cfl_ac[DAV1D_PIXEL_LAYOUT_I422 - 1][RTX_4X8  ] = cfl_ac_8x8_to_4x8_c;
-    c->cfl_ac[DAV1D_PIXEL_LAYOUT_I422 - 1][RTX_8X4  ] = cfl_ac_16x4_to_8x4_c;
-    c->cfl_ac[DAV1D_PIXEL_LAYOUT_I422 - 1][ TX_8X8  ] = cfl_ac_16x8_to_8x8_c;
-    c->cfl_ac[DAV1D_PIXEL_LAYOUT_I422 - 1][RTX_8X16 ] = cfl_ac_16x16_to_8x16_c;
-    c->cfl_ac[DAV1D_PIXEL_LAYOUT_I422 - 1][RTX_16X8 ] = cfl_ac_32x8_to_16x8_c;
-    c->cfl_ac[DAV1D_PIXEL_LAYOUT_I422 - 1][ TX_16X16] = cfl_ac_32x16_to_16x16_c;
-    c->cfl_ac[DAV1D_PIXEL_LAYOUT_I422 - 1][RTX_16X32] = cfl_ac_32x32_to_16x32_c;
-
-    c->cfl_ac[DAV1D_PIXEL_LAYOUT_I444 - 1][ TX_4X4  ] = cfl_ac_4x4_to_4x4_c;
-    c->cfl_ac[DAV1D_PIXEL_LAYOUT_I444 - 1][RTX_4X8  ] = cfl_ac_4x8_to_4x8_c;
-    c->cfl_ac[DAV1D_PIXEL_LAYOUT_I444 - 1][RTX_4X16 ] = cfl_ac_4x16_to_4x16_c;
-    c->cfl_ac[DAV1D_PIXEL_LAYOUT_I444 - 1][RTX_8X4  ] = cfl_ac_8x4_to_8x4_c;
-    c->cfl_ac[DAV1D_PIXEL_LAYOUT_I444 - 1][ TX_8X8  ] = cfl_ac_8x8_to_8x8_c;
-    c->cfl_ac[DAV1D_PIXEL_LAYOUT_I444 - 1][RTX_8X16 ] = cfl_ac_8x16_to_8x16_c;
-    c->cfl_ac[DAV1D_PIXEL_LAYOUT_I444 - 1][RTX_8X32 ] = cfl_ac_8x32_to_8x32_c;
-    c->cfl_ac[DAV1D_PIXEL_LAYOUT_I444 - 1][RTX_16X4 ] = cfl_ac_16x4_to_16x4_c;
-    c->cfl_ac[DAV1D_PIXEL_LAYOUT_I444 - 1][RTX_16X8 ] = cfl_ac_16x8_to_16x8_c;
-    c->cfl_ac[DAV1D_PIXEL_LAYOUT_I444 - 1][ TX_16X16] = cfl_ac_16x16_to_16x16_c;
-    c->cfl_ac[DAV1D_PIXEL_LAYOUT_I444 - 1][RTX_16X32] = cfl_ac_16x32_to_16x32_c;
-    c->cfl_ac[DAV1D_PIXEL_LAYOUT_I444 - 1][RTX_32X8 ] = cfl_ac_32x8_to_32x8_c;
-    c->cfl_ac[DAV1D_PIXEL_LAYOUT_I444 - 1][RTX_32X16] = cfl_ac_32x16_to_32x16_c;
-    c->cfl_ac[DAV1D_PIXEL_LAYOUT_I444 - 1][ TX_32X32] = cfl_ac_32x32_to_32x32_c;
+    c->cfl_ac[DAV1D_PIXEL_LAYOUT_I420 - 1] = cfl_ac_420_c;
+    c->cfl_ac[DAV1D_PIXEL_LAYOUT_I422 - 1] = cfl_ac_422_c;
+    c->cfl_ac[DAV1D_PIXEL_LAYOUT_I444 - 1] = cfl_ac_444_c;
 
     c->cfl_pred[DC_PRED     ] = ipred_cfl_c;
     c->cfl_pred[DC_128_PRED ] = ipred_cfl_128_c;
--- a/src/recon_tmpl.c
+++ b/src/recon_tmpl.c
@@ -856,10 +856,10 @@
                     ((cw4 << ss_hor) + t_dim->w - 1) & ~(t_dim->w - 1);
                 const int furthest_b =
                     ((ch4 << ss_ver) + t_dim->h - 1) & ~(t_dim->h - 1);
-                dsp->ipred.cfl_ac[f->cur.p.p.layout - 1]
-                                 [b->uvtx](ac, y_src, f->cur.p.stride[0],
-                                           cbw4 - (furthest_r >> ss_hor),
-                                           cbh4 - (furthest_b >> ss_ver));
+                dsp->ipred.cfl_ac[f->cur.p.p.layout - 1](ac, y_src, f->cur.p.stride[0],
+                                                         cbw4 - (furthest_r >> ss_hor),
+                                                         cbh4 - (furthest_b >> ss_ver),
+                                                         cbw4 * 4, cbh4 * 4);
                 for (int pl = 0; pl < 2; pl++) {
                     if (!b->cfl_alpha[pl]) continue;
                     int angle = 0;