shithub: dav1d

--- a/src/internal.h

+++ b/src/internal.h

@@ -278,7 +278,7 @@

         uint8_t *pal_idx;

         int16_t *ac;

         pixel *interintra, *lap;

-        coef *compinter;

+        int16_t *compinter;

     } scratch;

     ALIGN(uint8_t scratch_seg_mask[128 * 128], 32);

--- a/src/lib.c

+++ b/src/lib.c

@@ -124,7 +124,7 @@

             t->f = f;

             t->cf = dav1d_alloc_aligned(32 * 32 * sizeof(int32_t), 32);

             if (!t->cf) goto error;

-            t->scratch.mem = dav1d_alloc_aligned(128 * 128 * 8, 32);

+            t->scratch.mem = dav1d_alloc_aligned(128 * 128 * 4, 32);

             if (!t->scratch.mem) goto error;

             memset(t->cf, 0, 32 * 32 * sizeof(int32_t));

             t->emu_edge =

--- a/src/mc.h

+++ b/src/mc.h

@@ -54,17 +54,17 @@

 typedef decl_warp8x8_fn(*warp8x8_fn);

 #define decl_mct_fn(name) \

-void (name)(coef *tmp, const pixel *src, ptrdiff_t src_stride, \

+void (name)(int16_t *tmp, const pixel *src, ptrdiff_t src_stride, \

             int w, int h, int mx, int my)

 typedef decl_mct_fn(*mct_fn);

 #define decl_mct_scaled_fn(name) \

-void (name)(coef *tmp, const pixel *src, ptrdiff_t src_stride, \

+void (name)(int16_t *tmp, const pixel *src, ptrdiff_t src_stride, \

             int w, int h, int mx, int my, int dx, int dy)

 typedef decl_mct_scaled_fn(*mct_scaled_fn);

 #define decl_warp8x8t_fn(name) \

-void (name)(coef *tmp, const ptrdiff_t tmp_stride, \

+void (name)(int16_t *tmp, const ptrdiff_t tmp_stride, \

             const pixel *src, ptrdiff_t src_stride, \

             const int16_t *abcd, int mx, int my)

 typedef decl_warp8x8t_fn(*warp8x8t_fn);

@@ -71,23 +71,23 @@

 #define decl_avg_fn(name) \

 void (name)(pixel *dst, ptrdiff_t dst_stride, \

-            const coef *tmp1, const coef *tmp2, int w, int h)

+            const int16_t *tmp1, const int16_t *tmp2, int w, int h)

 typedef decl_avg_fn(*avg_fn);

 #define decl_w_avg_fn(name) \

 void (name)(pixel *dst, ptrdiff_t dst_stride, \

-            const coef *tmp1, const coef *tmp2, int w, int h, int weight)

+            const int16_t *tmp1, const int16_t *tmp2, int w, int h, int weight)

 typedef decl_w_avg_fn(*w_avg_fn);

 #define decl_mask_fn(name) \

 void (name)(pixel *dst, ptrdiff_t dst_stride, \

-            const coef *tmp1, const coef *tmp2, int w, int h, \

+            const int16_t *tmp1, const int16_t *tmp2, int w, int h, \

             const uint8_t *mask)

 typedef decl_mask_fn(*mask_fn);

 #define decl_w_mask_fn(name) \

 void (name)(pixel *dst, ptrdiff_t dst_stride, \

-            const coef *tmp1, const coef *tmp2, int w, int h, \

+            const int16_t *tmp1, const int16_t *tmp2, int w, int h, \

             uint8_t *mask, int sign)

 typedef decl_w_mask_fn(*w_mask_fn);

--- a/src/mc_tmpl.c

+++ b/src/mc_tmpl.c

@@ -50,7 +50,7 @@

 static NOINLINE void

-prep_c(coef *tmp, const pixel *src, const ptrdiff_t src_stride,

+prep_c(int16_t *tmp, const pixel *src, const ptrdiff_t src_stride,

        const int w, int h)

     do {

@@ -105,7 +105,7 @@

     if (fh) {

         if (fv) {

             int tmp_h = h + 7;

-            coef mid[128 * 135], *mid_ptr = mid;

+            int16_t mid[128 * 135], *mid_ptr = mid;

             src -= src_stride * 3;

             do {

@@ -154,7 +154,7 @@

                   const int dx, const int dy, const int filter_type)

     int tmp_h = (((h - 1) * dy + my) >> 10) + 8;

-    coef mid[128 * (256 + 7)], *mid_ptr = mid;

+    int16_t mid[128 * (256 + 7)], *mid_ptr = mid;

     src_stride = PXSTRIDE(src_stride);

     src -= src_stride * 3;

@@ -191,7 +191,7 @@

 static NOINLINE void

-prep_8tap_c(coef *tmp, const pixel *src, ptrdiff_t src_stride,

+prep_8tap_c(int16_t *tmp, const pixel *src, ptrdiff_t src_stride,

             const int w, int h, const int mx, const int my,

             const int filter_type)

@@ -201,7 +201,7 @@

     if (fh) {

         if (fv) {

             int tmp_h = h + 7;

-            coef mid[128 * 135], *mid_ptr = mid;

+            int16_t mid[128 * 135], *mid_ptr = mid;

             src -= src_stride * 3;

             do {

@@ -242,12 +242,12 @@

 static NOINLINE void

-prep_8tap_scaled_c(coef *tmp, const pixel *src, ptrdiff_t src_stride,

+prep_8tap_scaled_c(int16_t *tmp, const pixel *src, ptrdiff_t src_stride,

                    const int w, int h, const int mx, int my,

                    const int dx, const int dy, const int filter_type)

     int tmp_h = (((h - 1) * dy + my) >> 10) + 8;

-    coef mid[128 * (256 + 7)], *mid_ptr = mid;

+    int16_t mid[128 * (256 + 7)], *mid_ptr = mid;

     src_stride = PXSTRIDE(src_stride);

     src -= src_stride * 3;

@@ -304,7 +304,7 @@

     put_8tap_scaled_c(dst, dst_stride, src, src_stride, w, h, mx, my, dx, dy, \

                       type_h | (type_v << 2)); \

} \

-static void prep_8tap_##type##_c(coef *const tmp, \

+static void prep_8tap_##type##_c(int16_t *const tmp, \

                                  const pixel *const src, \

                                  const ptrdiff_t src_stride, \

                                  const int w, const int h, \

@@ -313,7 +313,7 @@

     prep_8tap_c(tmp, src, src_stride, w, h, mx, my, \

                 type_h | (type_v << 2)); \

} \

-static void prep_8tap_##type##_scaled_c(coef *const tmp, \

+static void prep_8tap_##type##_scaled_c(int16_t *const tmp, \

                                         const pixel *const src, \

                                         const ptrdiff_t src_stride, \

                                         const int w, const int h, \

@@ -352,7 +352,7 @@

     if (mx) {

         if (my) {

-            coef mid[128 * 129], *mid_ptr = mid;

+            int16_t mid[128 * 129], *mid_ptr = mid;

             int tmp_h = h + 1;

             do {

@@ -398,7 +398,7 @@

                                const int dx, const int dy)

     int tmp_h = (((h - 1) * dy + my) >> 10) + 2;

-    coef mid[128 * (256 + 1)], *mid_ptr = mid;

+    int16_t mid[128 * (256 + 1)], *mid_ptr = mid;

     do {

         int x;

@@ -429,7 +429,7 @@

     } while (--h);

-static void prep_bilin_c(coef *tmp,

+static void prep_bilin_c(int16_t *tmp,

                          const pixel *src, ptrdiff_t src_stride,

                          const int w, int h, const int mx, const int my)

@@ -437,7 +437,7 @@

     if (mx) {

         if (my) {

-            coef mid[128 * 129], *mid_ptr = mid;

+            int16_t mid[128 * 129], *mid_ptr = mid;

             int tmp_h = h + 1;

             do {

@@ -477,13 +477,13 @@

         prep_c(tmp, src, src_stride, w, h);

-static void prep_bilin_scaled_c(coef *tmp,

+static void prep_bilin_scaled_c(int16_t *tmp,

                                 const pixel *src, ptrdiff_t src_stride,

                                 const int w, int h, const int mx, int my,

                                 const int dx, const int dy)

     int tmp_h = (((h - 1) * dy + my) >> 10) + 2;

-    coef mid[128 * (256 + 1)], *mid_ptr = mid;

+    int16_t mid[128 * (256 + 1)], *mid_ptr = mid;

     do {

         int x;

@@ -515,7 +515,7 @@

 static void avg_c(pixel *dst, const ptrdiff_t dst_stride,

-                  const coef *tmp1, const coef *tmp2, const int w, int h)

+                  const int16_t *tmp1, const int16_t *tmp2, const int w, int h)

     do {

         for (int x = 0; x < w; x++)

@@ -528,7 +528,7 @@

 static void w_avg_c(pixel *dst, const ptrdiff_t dst_stride,

-                    const coef *tmp1, const coef *tmp2, const int w, int h,

+                    const int16_t *tmp1, const int16_t *tmp2, const int w, int h,

                     const int weight)

     do {

@@ -543,7 +543,7 @@

 static void mask_c(pixel *dst, const ptrdiff_t dst_stride,

-                   const coef *tmp1, const coef *tmp2, const int w, int h,

+                   const int16_t *tmp1, const int16_t *tmp2, const int w, int h,

                    const uint8_t *mask)

     do {

@@ -601,7 +601,7 @@

 static void w_mask_c(pixel *dst, const ptrdiff_t dst_stride,

-                     const coef *tmp1, const coef *tmp2, const int w, int h,

+                     const int16_t *tmp1, const int16_t *tmp2, const int w, int h,

                      uint8_t *mask, const int sign,

                      const int ss_hor, const int ss_ver)

@@ -642,7 +642,7 @@

 #define w_mask_fns(ssn, ss_hor, ss_ver) \

 static void w_mask_##ssn##_c(pixel *const dst, const ptrdiff_t dst_stride, \

-                             const coef *const tmp1, const coef *const tmp2, \

+                             const int16_t *const tmp1, const int16_t *const tmp2, \

                              const int w, const int h, uint8_t *mask, \

                              const int sign) \

{ \

@@ -675,7 +675,7 @@

                               const pixel *src, const ptrdiff_t src_stride,

                               const int16_t *const abcd, int mx, int my)

-    coef mid[15 * 8], *mid_ptr = mid;

+    int16_t mid[15 * 8], *mid_ptr = mid;

     src -= 3 * PXSTRIDE(src_stride);

     for (int y = 0; y < 15; y++, mx += abcd[1]) {

@@ -702,11 +702,11 @@

-static void warp_affine_8x8t_c(coef *tmp, const ptrdiff_t tmp_stride,

+static void warp_affine_8x8t_c(int16_t *tmp, const ptrdiff_t tmp_stride,

                                const pixel *src, const ptrdiff_t src_stride,

                                const int16_t *const abcd, int mx, int my)

-    coef mid[15 * 8], *mid_ptr = mid;

+    int16_t mid[15 * 8], *mid_ptr = mid;

     src -= 3 * PXSTRIDE(src_stride);

     for (int y = 0; y < 15; y++, mx += abcd[1]) {

--- a/src/recon_tmpl.c

+++ b/src/recon_tmpl.c

@@ -493,7 +493,7 @@

 static int mc(Dav1dTileContext *const t,

-              pixel *const dst8, coef *const dst16, const ptrdiff_t dst_stride,

+              pixel *const dst8, int16_t *const dst16, const ptrdiff_t dst_stride,

               const int bw4, const int bh4,

               const int bx, const int by, const int pl,

               const mv mv, const Dav1dThreadPicture *const refp, const int refidx,

@@ -671,7 +671,7 @@

 static int warp_affine(Dav1dTileContext *const t,

-                       pixel *dst8, coef *dst16, const ptrdiff_t dstride,

+                       pixel *dst8, int16_t *dst16, const ptrdiff_t dstride,

                        const uint8_t *const b_dim, const int pl,

                        const Dav1dThreadPicture *const refp,

                        const Dav1dWarpedMotionParams *const wmp)

@@ -1357,7 +1357,7 @@

     } else {

         const enum Filter2d filter_2d = b->filter2d;

         // Maximum super block size is 128x128

-        coef (*tmp)[128 * 128] = (coef (*)[128 * 128]) t->scratch.compinter;

+        int16_t (*tmp)[128 * 128] = (int16_t (*)[128 * 128]) t->scratch.compinter;

         int jnt_weight;

         uint8_t *const seg_mask = t->scratch_seg_mask;

         const uint8_t *mask;

@@ -1372,8 +1372,6 @@

             } else {

                 res = mc(t, NULL, tmp[i], 0, bw4, bh4, t->bx, t->by, 0,

                          b->mv[i], refp, b->ref[i], filter_2d);

-                if (DEBUG_BLOCK_INFO)

-                    coef_dump(tmp[i], bw4*4, bh4*4, 3, "med");

                 if (res) return res;

--- a/tests/checkasm/mc.c

+++ b/tests/checkasm/mc.c

@@ -80,14 +80,14 @@

 static void check_mct(Dav1dMCDSPContext *const c) {

     ALIGN_STK_32(pixel, src_buf, 135 * 135,);

-    ALIGN_STK_32(coef,  c_tmp,   128 * 128,);

-    ALIGN_STK_32(coef,  a_tmp,   128 * 128,);

+    ALIGN_STK_32(int16_t, c_tmp,   128 * 128,);

+    ALIGN_STK_32(int16_t, a_tmp,   128 * 128,);

     const pixel *src = src_buf + 135 * 3 + 3;

     for (int i = 0; i < 135 * 135; i++)

         src_buf[i] = rand();

-    declare_func(void, coef *tmp, const pixel *src, ptrdiff_t src_stride,

+    declare_func(void, int16_t *tmp, const pixel *src, ptrdiff_t src_stride,

                  int w, int h, int mx, int my);

     for (int filter = 0; filter < N_2D_FILTERS; filter++)

@@ -113,7 +113,7 @@

 static void init_tmp(Dav1dMCDSPContext *const c, pixel *const buf,

-                     coef (*const tmp)[128 * 128])

+                     int16_t (*const tmp)[128 * 128])

     for (int i = 0; i < 2; i++) {

         for (int j = 0; j < 135 * 135; j++)

@@ -125,14 +125,14 @@

 static void check_avg(Dav1dMCDSPContext *const c) {

-    ALIGN_STK_32(coef, tmp, 2, [128 * 128]);

+    ALIGN_STK_32(int16_t, tmp, 2, [128 * 128]);

     ALIGN_STK_32(pixel, c_dst, 135 * 135,);

     ALIGN_STK_32(pixel, a_dst, 128 * 128,);

     init_tmp(c, c_dst, tmp);

-    declare_func(void, pixel *dst, ptrdiff_t dst_stride, const coef *tmp1,

-                 const coef *tmp2, int w, int h);

+    declare_func(void, pixel *dst, ptrdiff_t dst_stride, const int16_t *tmp1,

+                 const int16_t *tmp2, int w, int h);

     for (int w = 4; w <= 128; w <<= 1)

         if (check_func(c->avg, "avg_w%d_%dbpc", w, BITDEPTH))

@@ -149,14 +149,14 @@

 static void check_w_avg(Dav1dMCDSPContext *const c) {

-    ALIGN_STK_32(coef, tmp, 2, [128 * 128]);

+    ALIGN_STK_32(int16_t, tmp, 2, [128 * 128]);

     ALIGN_STK_32(pixel, c_dst, 135 * 135,);

     ALIGN_STK_32(pixel, a_dst, 128 * 128,);

     init_tmp(c, c_dst, tmp);

-    declare_func(void, pixel *dst, ptrdiff_t dst_stride, const coef *tmp1,

-                 const coef *tmp2, int w, int h, int weight);

+    declare_func(void, pixel *dst, ptrdiff_t dst_stride, const int16_t *tmp1,

+                 const int16_t *tmp2, int w, int h, int weight);

     for (int w = 4; w <= 128; w <<= 1)

         if (check_func(c->w_avg, "w_avg_w%d_%dbpc", w, BITDEPTH))

@@ -175,7 +175,7 @@

 static void check_mask(Dav1dMCDSPContext *const c) {

-    ALIGN_STK_32(coef, tmp, 2, [128 * 128]);

+    ALIGN_STK_32(int16_t, tmp, 2, [128 * 128]);

     ALIGN_STK_32(pixel,   c_dst, 135 * 135,);

     ALIGN_STK_32(pixel,   a_dst, 128 * 128,);

     ALIGN_STK_32(uint8_t, mask,  128 * 128,);

@@ -184,8 +184,8 @@

     for (int i = 0; i < 128 * 128; i++)

         mask[i] = rand() % 65;

-    declare_func(void, pixel *dst, ptrdiff_t dst_stride, const coef *tmp1,

-                 const coef *tmp2, int w, int h, const uint8_t *mask);

+    declare_func(void, pixel *dst, ptrdiff_t dst_stride, const int16_t *tmp1,

+                 const int16_t *tmp2, int w, int h, const uint8_t *mask);

     for (int w = 4; w <= 128; w <<= 1)

         if (check_func(c->mask, "mask_w%d_%dbpc", w, BITDEPTH))

@@ -202,7 +202,7 @@

 static void check_w_mask(Dav1dMCDSPContext *const c) {

-    ALIGN_STK_32(coef, tmp, 2, [128 * 128]);

+    ALIGN_STK_32(int16_t, tmp, 2, [128 * 128]);

     ALIGN_STK_32(pixel,   c_dst,  135 * 135,);

     ALIGN_STK_32(pixel,   a_dst,  128 * 128,);

     ALIGN_STK_32(uint8_t, c_mask, 128 * 128,);

@@ -210,8 +210,8 @@

     init_tmp(c, c_dst, tmp);

-    declare_func(void, pixel *dst, ptrdiff_t dst_stride, const coef *tmp1,

-                 const coef *tmp2, int w, int h, uint8_t *mask, int sign);

+    declare_func(void, pixel *dst, ptrdiff_t dst_stride, const int16_t *tmp1,

+                 const int16_t *tmp2, int w, int h, uint8_t *mask, int sign);

     static const uint16_t ss[] = { 444, 422, 420 };

@@ -360,13 +360,13 @@

 static void check_warp8x8t(Dav1dMCDSPContext *const c) {

     ALIGN_STK_32(pixel, src_buf, 15 * 15,);

-    ALIGN_STK_32(coef,  c_tmp,    8 *  8,);

-    ALIGN_STK_32(coef,  a_tmp,    8 *  8,);

+    ALIGN_STK_32(int16_t, c_tmp,    8 *  8,);

+    ALIGN_STK_32(int16_t, a_tmp,    8 *  8,);

     int16_t abcd[4];

     const pixel *src = src_buf + 15 * 3 + 3;

     const ptrdiff_t src_stride = 15 * sizeof(pixel);

-    declare_func(void, coef *tmp, ptrdiff_t tmp_stride, const pixel *src,

+    declare_func(void, int16_t *tmp, ptrdiff_t tmp_stride, const pixel *src,

                  ptrdiff_t src_stride, const int16_t *abcd, int mx, int my);

     if (check_func(c->warp8x8t, "warp_8x8t_%dbpc", BITDEPTH)) {

--

⑨