shithub: dav1d

Download patch

ref: 9c4d778250524e512ecf501ea994940423cc524e
parent: 8d238cdd06c1e77479b0577cff80efd137b8cbfb
author: David Michael Barr <b@rr-dav.id.au>
date: Wed Oct 10 20:40:15 EDT 2018

Compute DC within chroma-from-luma prediction

Removes the splat of pixels that are then overwritten.
Rename cfl_pred_1 to cfl_pred now that there is only one.

--- a/src/ipred.c
+++ b/src/ipred.c
@@ -68,28 +68,68 @@
 #endif
 }
 
-static void ipred_dc_top_c(pixel *dst, const ptrdiff_t stride,
-                           const pixel *const topleft,
-                           const int width, const int height, const int a)
+static NOINLINE void
+cfl_pred(pixel *dst, const ptrdiff_t stride,
+         const int width, const int height, const unsigned dc,
+         const int8_t alpha, const int16_t *ac)
 {
+    for (int y = 0; y < height; y++) {
+        for (int x = 0; x < width; x++) {
+            const int diff = alpha * ac[x];
+            dst[x] = iclip_pixel(dc + apply_sign((abs(diff) + 32) >> 6, diff));
+        }
+        ac += width;
+        dst += PXSTRIDE(stride);
+    }
+}
+
+static unsigned dc_gen_top(const pixel *const topleft, const int width)
+{
     unsigned dc = width >> 1;
     for (int i = 0; i < width; i++)
        dc += topleft[1 + i];
+    return dc >> ctz(width);
+}
 
-    splat_dc(dst, stride, width, height, dc >> ctz(width));
+static void ipred_dc_top_c(pixel *dst, const ptrdiff_t stride,
+                           const pixel *const topleft,
+                           const int width, const int height, const int a)
+{
+    splat_dc(dst, stride, width, height, dc_gen_top(topleft, width));
 }
 
-static void ipred_dc_left_c(pixel *dst, const ptrdiff_t stride,
+static void ipred_cfl_top_c(pixel *dst, const ptrdiff_t stride,
                             const pixel *const topleft,
-                            const int width, const int height, const int a)
+                            const int width, const int height,
+                            const int8_t alpha, const int16_t *ac)
 {
+    cfl_pred(dst, stride, width, height, dc_gen_top(topleft, width), alpha, ac);
+}
+
+static unsigned dc_gen_left(const pixel *const topleft, const int height)
+{
     unsigned dc = height >> 1;
     for (int i = 0; i < height; i++)
        dc += topleft[-(1 + i)];
+    return dc >> ctz(height);
+}
 
-    splat_dc(dst, stride, width, height, dc >> ctz(height));
+static void ipred_dc_left_c(pixel *dst, const ptrdiff_t stride,
+                            const pixel *const topleft,
+                            const int width, const int height, const int a)
+{
+    splat_dc(dst, stride, width, height, dc_gen_left(topleft, height));
 }
 
+static void ipred_cfl_left_c(pixel *dst, const ptrdiff_t stride,
+                             const pixel *const topleft,
+                             const int width, const int height,
+                             const int8_t alpha, const int16_t *ac)
+{
+    unsigned dc = dc_gen_left(topleft, height);
+    cfl_pred(dst, stride, width, height, dc, alpha, ac);
+}
+
 #if BITDEPTH == 8
 #define MULTIPLIER_1x2 0x5556
 #define MULTIPLIER_1x4 0x3334
@@ -100,9 +140,8 @@
 #define BASE_SHIFT 17
 #endif
 
-static void ipred_dc_c(pixel *dst, const ptrdiff_t stride,
-                       const pixel *const topleft,
-                       const int width, const int height, const int a)
+static unsigned
+dc_gen(const pixel *const topleft, const int width, const int height)
 {
     unsigned dc = (width + height) >> 1;
     for (int i = 0; i < width; i++)
@@ -116,10 +155,25 @@
                                                            MULTIPLIER_1x2;
         dc >>= BASE_SHIFT;
     }
+    return dc;
+}
 
-    splat_dc(dst, stride, width, height, dc);
+static void ipred_dc_c(pixel *dst, const ptrdiff_t stride,
+                       const pixel *const topleft,
+                       const int width, const int height, const int a)
+{
+    splat_dc(dst, stride, width, height, dc_gen(topleft, width, height));
 }
 
+static void ipred_cfl_c(pixel *dst, const ptrdiff_t stride,
+                        const pixel *const topleft,
+                        const int width, const int height,
+                        const int8_t alpha, const int16_t *ac)
+{
+    unsigned dc = dc_gen(topleft, width, height);
+    cfl_pred(dst, stride, width, height, dc, alpha, ac);
+}
+
 #undef MULTIPLIER_1x2
 #undef MULTIPLIER_1x4
 #undef BASE_SHIFT
@@ -131,6 +185,14 @@
     splat_dc(dst, stride, width, height, 1 << (BITDEPTH - 1));
 }
 
+static void ipred_cfl_128_c(pixel *dst, const ptrdiff_t stride,
+                            const pixel *const topleft,
+                            const int width, const int height,
+                            const int8_t alpha, const int16_t *ac)
+{
+    cfl_pred(dst, stride, width, height, 1 << (BITDEPTH - 1), alpha, ac);
+}
+
 static void ipred_v_c(pixel *dst, const ptrdiff_t stride,
                       const pixel *const topleft,
                       const int width, const int height, const int a)
@@ -620,36 +682,6 @@
 cfl_ac_fn(32, 16, 32, 16, 0, 0, 9)
 cfl_ac_fn(32, 32, 32, 32, 0, 0, 10)
 
-static NOINLINE void
-cfl_pred_1_c(pixel *dst, const ptrdiff_t stride, const int16_t *ac,
-             const int8_t alpha, const int width, const int height)
-{
-    const pixel dc = *dst;
-    for (int y = 0; y < height; y++) {
-        for (int x = 0; x < width; x++) {
-            const int diff = alpha * ac[x];
-            dst[x] = iclip_pixel(dc + apply_sign((abs(diff) + 32) >> 6, diff));
-        }
-        ac += width;
-        dst += PXSTRIDE(stride);
-    }
-}
-
-#define cfl_pred_1_fn(width) \
-static void cfl_pred_1_##width##xN_c(pixel *const dst, \
-                                     const ptrdiff_t stride, \
-                                     const int16_t *const ac, \
-                                     const int8_t alpha, \
-                                     const int height) \
-{ \
-    cfl_pred_1_c(dst, stride, ac, alpha, width, height); \
-}
-
-cfl_pred_1_fn( 4)
-cfl_pred_1_fn( 8)
-cfl_pred_1_fn(16)
-cfl_pred_1_fn(32)
-
 static void pal_pred_c(pixel *dst, const ptrdiff_t stride,
                        const uint16_t *const pal, const uint8_t *idx,
                        const int w, const int h)
@@ -713,10 +745,10 @@
     c->cfl_ac[DAV1D_PIXEL_LAYOUT_I444 - 1][RTX_32X16] = cfl_ac_32x16_to_32x16_c;
     c->cfl_ac[DAV1D_PIXEL_LAYOUT_I444 - 1][ TX_32X32] = cfl_ac_32x32_to_32x32_c;
 
-    c->cfl_pred_1[0] = cfl_pred_1_4xN_c;
-    c->cfl_pred_1[1] = cfl_pred_1_8xN_c;
-    c->cfl_pred_1[2] = cfl_pred_1_16xN_c;
-    c->cfl_pred_1[3] = cfl_pred_1_32xN_c;
+    c->cfl_pred[DC_PRED     ] = ipred_cfl_c;
+    c->cfl_pred[DC_128_PRED ] = ipred_cfl_128_c;
+    c->cfl_pred[TOP_DC_PRED ] = ipred_cfl_top_c;
+    c->cfl_pred[LEFT_DC_PRED] = ipred_cfl_left_c;
 
     c->pal_pred = pal_pred_c;
 
--- a/src/ipred.h
+++ b/src/ipred.h
@@ -59,11 +59,11 @@
  * dst[x,y] += alpha * ac[x,y]
  * - alpha contains a q3 scalar in [-16,16] range;
  */
-#define decl_cfl_pred_1_fn(name) \
-void (name)(pixel *dst, ptrdiff_t stride, \
-            const int16_t *ac, const int8_t alpha, \
-            const int height)
-typedef decl_cfl_pred_1_fn(*cfl_pred_1_fn);
+#define decl_cfl_pred_fn(name) \
+void (name)(pixel *dst, ptrdiff_t stride, const pixel *topleft, \
+            int width, int height, const int8_t alpha, \
+            const int16_t *ac)
+typedef decl_cfl_pred_fn(*cfl_pred_fn);
 
 /*
  * dst[x,y] = pal[idx[x,y]]
@@ -79,7 +79,7 @@
 
     // chroma-from-luma
     cfl_ac_fn cfl_ac[3 /* 420, 422, 444 */][N_RECT_TX_SIZES /* chroma tx size */];
-    cfl_pred_1_fn cfl_pred_1[4];
+    cfl_pred_fn cfl_pred[DC_128_PRED + 1];
 
     // palette
     pal_pred_fn pal_pred;
--- a/src/recon.c
+++ b/src/recon.c
@@ -880,14 +880,15 @@
                                                           top_sb_edge, DC_PRED, &angle,
                                                           cfl_uv_t_dim->w,
                                                           cfl_uv_t_dim->h, edge);
-                    dsp->ipred.intra_pred[m](uv_dst[pl], stride, edge,
-                                             cfl_uv_t_dim->w * 4,
-                                             cfl_uv_t_dim->h * 4, 0);
                     if (b->cfl_alpha[pl]) {
-                        dsp->ipred.cfl_pred_1[cfl_uv_t_dim->lw](uv_dst[pl],
-                                                                stride, ac,
-                                                                b->cfl_alpha[pl],
-                                                                cbh4 * 4);
+                        dsp->ipred.cfl_pred[m](uv_dst[pl], stride, edge,
+                                               cfl_uv_t_dim->w * 4,
+                                               cfl_uv_t_dim->h * 4,
+                                               b->cfl_alpha[pl], ac);
+                    } else {
+                        dsp->ipred.intra_pred[m](uv_dst[pl], stride, edge,
+                                                 cfl_uv_t_dim->w * 4,
+                                                 cfl_uv_t_dim->h * 4, 0);
                     }
                 }
                 if (DEBUG_BLOCK_INFO && DEBUG_B_PIXELS) {