shithub: dav1d

Download patch

ref: ec53ec6d5b4ebbb06d5c7209fd4f25634a6ad606
parent: e10b855c530a3a5ed519fcb3f3ec085a6e0aa79e
author: Ronald S. Bultje <rsbultje@gmail.com>
date: Tue Nov 13 09:24:12 EST 2018

Add support for scaled references

Fixes #121.

--- a/include/common/intops.h
+++ b/include/common/intops.h
@@ -52,6 +52,10 @@
     return s < 0 ? -v : v;
 }
 
+static inline int apply_sign64(const int v, const int64_t s) {
+    return s < 0 ? -v : v;
+}
+
 static inline int ulog2(const unsigned v) {
     return 31 - clz(v);
 }
--- a/src/decode.c
+++ b/src/decode.c
@@ -1720,7 +1720,8 @@
                 uint64_t mask[2] = { 0, 0 };
                 find_matching_ref(t, intra_edge_flags, bw4, bh4, w4, h4,
                                   have_left, have_top, b->ref[0], mask);
-                const int allow_warp = !f->frame_hdr.force_integer_mv &&
+                const int allow_warp = !f->svc[b->ref[0]][0].scale &&
+                    !f->frame_hdr.force_integer_mv &&
                     f->frame_hdr.warp_motion && (mask[0] | mask[1]);
 
                 b->motion_mode = allow_warp ?
@@ -2938,8 +2939,10 @@
         for (int i = 0; i < 7; i++) {
             const int refidx = f->frame_hdr.refidx[i];
             if (!c->refs[refidx].p.p.data[0] ||
-                f->frame_hdr.width  != c->refs[refidx].p.p.p.w ||
-                f->frame_hdr.height != c->refs[refidx].p.p.p.h ||
+                f->frame_hdr.width * 2 < c->refs[refidx].p.p.p.w ||
+                f->frame_hdr.height * 2 < c->refs[refidx].p.p.p.h ||
+                f->frame_hdr.width > c->refs[refidx].p.p.p.w * 16 ||
+                f->frame_hdr.height > c->refs[refidx].p.p.p.h * 16 ||
                 f->seq_hdr.layout != c->refs[refidx].p.p.p.layout ||
                 f->seq_hdr.bpc != c->refs[refidx].p.p.p.bpc)
             {
@@ -2949,6 +2952,21 @@
                 goto error;
             }
             dav1d_thread_picture_ref(&f->refp[i], &c->refs[refidx].p);
+            if (f->frame_hdr.width  != c->refs[refidx].p.p.p.w ||
+                f->frame_hdr.height != c->refs[refidx].p.p.p.h)
+            {
+#define scale_fac(ref_sz, this_sz) \
+    (((ref_sz << 14) + (this_sz >> 1)) / this_sz)
+                f->svc[i][0].scale = scale_fac(c->refs[refidx].p.p.p.w,
+                                               f->frame_hdr.width);
+                f->svc[i][1].scale = scale_fac(c->refs[refidx].p.p.p.h,
+                                               f->frame_hdr.height);
+#undef scale_fac
+                f->svc[i][0].step = (f->svc[i][0].scale + 8) >> 4;
+                f->svc[i][1].step = (f->svc[i][1].scale + 8) >> 4;
+            } else {
+                f->svc[i][0].scale = 0;
+            }
         }
     }
 
--- a/src/internal.h
+++ b/src/internal.h
@@ -132,6 +132,12 @@
     } tile[256];
     int n_tile_data;
 
+    // for scalable references
+    struct ScalableMotionParams {
+        int scale; // if no scaling, this is 0
+        int step;
+    } svc[7][2 /* x, y */];
+
     const Dav1dContext *c;
     Dav1dTileContext *tc;
     int n_tc;
@@ -244,7 +250,7 @@
     int bx, by;
     BlockContext l, *a;
     coef *cf;
-    pixel *emu_edge; // stride=160
+    pixel *emu_edge; // stride=192 for non-SVC, or 320 for SVC
     // FIXME types can be changed to pixel (and dynamically allocated)
     // which would make copy/assign operations slightly faster?
     uint16_t al_pal[2 /* a/l */][32 /* bx/y4 */][3 /* plane */][8 /* palette_idx */];
--- a/src/lib.c
+++ b/src/lib.c
@@ -117,7 +117,7 @@
             if (!t->scratch.mem) goto error;
             memset(t->cf, 0, 32 * 32 * sizeof(int32_t));
             t->emu_edge =
-                dav1d_alloc_aligned(192 * (128 + 7) * sizeof(uint16_t), 32);
+                dav1d_alloc_aligned(320 * (256 + 7) * sizeof(uint16_t), 32);
             if (!t->emu_edge) goto error;
             if (f->n_tc > 1) {
                 pthread_mutex_init(&t->tile_thread.td.lock, NULL);
--- a/src/mc.h
+++ b/src/mc.h
@@ -41,6 +41,12 @@
             int w, int h, int mx, int my)
 typedef decl_mc_fn(*mc_fn);
 
+#define decl_mc_scaled_fn(name) \
+void (name)(pixel *dst, ptrdiff_t dst_stride, \
+            const pixel *src, ptrdiff_t src_stride, \
+            int w, int h, int mx, int my, int dx, int dy)
+typedef decl_mc_scaled_fn(*mc_scaled_fn);
+
 #define decl_warp8x8_fn(name) \
 void (name)(pixel *dst, ptrdiff_t dst_stride, \
             const pixel *src, ptrdiff_t src_stride, \
@@ -52,6 +58,11 @@
             int w, int h, int mx, int my)
 typedef decl_mct_fn(*mct_fn);
 
+#define decl_mct_scaled_fn(name) \
+void (name)(coef *tmp, const pixel *src, ptrdiff_t src_stride, \
+            int w, int h, int mx, int my, int dx, int dy)
+typedef decl_mct_scaled_fn(*mct_scaled_fn);
+
 #define decl_warp8x8t_fn(name) \
 void (name)(coef *tmp, const ptrdiff_t tmp_stride, \
             const pixel *src, ptrdiff_t src_stride, \
@@ -96,7 +107,9 @@
 
 typedef struct Dav1dMCDSPContext {
     mc_fn mc[N_2D_FILTERS];
+    mc_scaled_fn mc_scaled[N_2D_FILTERS];
     mct_fn mct[N_2D_FILTERS];
+    mct_scaled_fn mct_scaled[N_2D_FILTERS];
     avg_fn avg;
     w_avg_fn w_avg;
     mask_fn mask;
--- a/src/mc_tmpl.c
+++ b/src/mc_tmpl.c
@@ -78,13 +78,19 @@
 #define FILTER_8TAP_CLIP(src, x, F, stride, sh) \
     iclip_pixel(FILTER_8TAP_RND(src, x, F, stride, sh))
 
+#define GET_H_FILTER(mx) \
+    const int8_t *const fh = !(mx) ? NULL : w > 4 ? \
+        dav1d_mc_subpel_filters[filter_type & 3][(mx) - 1] : \
+        dav1d_mc_subpel_filters[3 + (filter_type & 1)][(mx) - 1]
+
+#define GET_V_FILTER(my) \
+    const int8_t *const fv = !(my) ? NULL : h > 4 ? \
+        dav1d_mc_subpel_filters[filter_type >> 2][(my) - 1] : \
+        dav1d_mc_subpel_filters[3 + ((filter_type >> 2) & 1)][(my) - 1]
+
 #define GET_FILTERS() \
-    const int8_t *const fh = !mx ? NULL : w > 4 ? \
-        dav1d_mc_subpel_filters[filter_type & 3][mx - 1] : \
-        dav1d_mc_subpel_filters[3 + (filter_type & 1)][mx - 1]; \
-    const int8_t *const fv = !my ? NULL : h > 4 ? \
-        dav1d_mc_subpel_filters[filter_type >> 2][my - 1] : \
-        dav1d_mc_subpel_filters[3 + ((filter_type >> 2) & 1)][my - 1]; \
+    GET_H_FILTER(mx); \
+    GET_V_FILTER(my)
 
 static NOINLINE void
 put_8tap_c(pixel *dst, ptrdiff_t dst_stride,
@@ -142,6 +148,48 @@
 }
 
 static NOINLINE void
+put_8tap_scaled_c(pixel *dst, const ptrdiff_t dst_stride,
+                  const pixel *src, const ptrdiff_t src_stride,
+                  const int w, int h, const int mx, int my,
+                  const int dx, const int dy, const int filter_type)
+{
+    int tmp_h = (((h - 1) * dy + my) >> 10) + 8;
+    coef mid[128 * (256 + 7)], *mid_ptr = mid;
+
+    src -= src_stride * 3;
+    do {
+        int x;
+        int imx = mx, ioff = 0;
+
+        for (x = 0; x < w; x++) {
+            GET_H_FILTER(imx >> 6);
+            mid_ptr[x] = fh ? FILTER_8TAP_RND(src, ioff, fh, 1, 2) : src[ioff] << 4;
+            imx += dx;
+            ioff += imx >> 10;
+            imx &= 0x3ff;
+        }
+
+        mid_ptr += 128;
+        src += PXSTRIDE(src_stride);
+    } while (--tmp_h);
+
+    mid_ptr = mid + 128 * 3;
+    for (int y = 0; y < h; y++) {
+        int x;
+        GET_V_FILTER(my >> 6);
+
+        for (x = 0; x < w; x++)
+            dst[x] = fv ? FILTER_8TAP_CLIP(mid_ptr, x, fv, 128, 10) :
+                          (mid_ptr[x] + 8) >> 4;
+
+        my += dy;
+        mid_ptr += (my >> 10) * 128;
+        my &= 0x3ff;
+        dst += PXSTRIDE(dst_stride);
+    }
+}
+
+static NOINLINE void
 prep_8tap_c(coef *tmp, const pixel *src, ptrdiff_t src_stride,
             const int w, int h, const int mx, const int my,
             const int filter_type)
@@ -192,6 +240,46 @@
         prep_c(tmp, src, src_stride, w, h);
 }
 
+static NOINLINE void
+prep_8tap_scaled_c(coef *tmp, const pixel *src, const ptrdiff_t src_stride,
+                   const int w, int h, const int mx, int my,
+                   const int dx, const int dy, const int filter_type)
+{
+    int tmp_h = (((h - 1) * dy + my) >> 10) + 8;
+    coef mid[128 * (256 + 7)], *mid_ptr = mid;
+
+    src -= src_stride * 3;
+    do {
+        int x;
+        int imx = mx, ioff = 0;
+
+        for (x = 0; x < w; x++) {
+            GET_H_FILTER(imx >> 6);
+            mid_ptr[x] = fh ? FILTER_8TAP_RND(src, ioff, fh, 1, 2) : src[ioff] << 4;
+            imx += dx;
+            ioff += imx >> 10;
+            imx &= 0x3ff;
+        }
+
+        mid_ptr += 128;
+        src += PXSTRIDE(src_stride);
+    } while (--tmp_h);
+
+    mid_ptr = mid + 128 * 3;
+    for (int y = 0; y < h; y++) {
+        int x;
+        GET_V_FILTER(my >> 6);
+
+        for (x = 0; x < w; x++)
+            tmp[x] = fv ? FILTER_8TAP_CLIP(mid_ptr, x, fv, 128, 6) : mid_ptr[x];
+
+        my += dy;
+        mid_ptr += (my >> 10) * 128;
+        my &= 0x3ff;
+        tmp += w;
+    }
+}
+
 #define filter_fns(type, type_h, type_v) \
 static void put_8tap_##type##_c(pixel *const dst, \
                                 const ptrdiff_t dst_stride, \
@@ -203,6 +291,17 @@
     put_8tap_c(dst, dst_stride, src, src_stride, w, h, mx, my, \
                type_h | (type_v << 2)); \
 } \
+static void put_8tap_##type##_scaled_c(pixel *const dst, \
+                                       const ptrdiff_t dst_stride, \
+                                       const pixel *const src, \
+                                       const ptrdiff_t src_stride, \
+                                       const int w, const int h, \
+                                       const int mx, const int my, \
+                                       const int dx, const int dy) \
+{ \
+    put_8tap_scaled_c(dst, dst_stride, src, src_stride, w, h, mx, my, dx, dy, \
+                      type_h | (type_v << 2)); \
+} \
 static void prep_8tap_##type##_c(coef *const tmp, \
                                  const pixel *const src, \
                                  const ptrdiff_t src_stride, \
@@ -211,6 +310,16 @@
 { \
     prep_8tap_c(tmp, src, src_stride, w, h, mx, my, \
                 type_h | (type_v << 2)); \
+} \
+static void prep_8tap_##type##_scaled_c(coef *const tmp, \
+                                        const pixel *const src, \
+                                        const ptrdiff_t src_stride, \
+                                        const int w, const int h, \
+                                        const int mx, const int my, \
+                                        const int dx, const int dy) \
+{ \
+    prep_8tap_scaled_c(tmp, src, src_stride, w, h, mx, my, dx, dy, \
+                       type_h | (type_v << 2)); \
 }
 
 filter_fns(regular,        FILTER_8TAP_REGULAR, FILTER_8TAP_REGULAR)
@@ -281,6 +390,43 @@
         put_c(dst, dst_stride, src, src_stride, w, h);
 }
 
+static void put_bilin_scaled_c(pixel *dst, ptrdiff_t dst_stride,
+                               const pixel *src, ptrdiff_t src_stride,
+                               const int w, int h, const int mx, int my,
+                               const int dx, const int dy)
+{
+    int tmp_h = (((h - 1) * dy + my) >> 10) + 2;
+    coef mid[128 * (256 + 1)], *mid_ptr = mid;
+
+    do {
+        int x;
+        int imx = mx, ioff = 0;
+
+        for (x = 0; x < w; x++) {
+            mid_ptr[x] = FILTER_BILIN(src, ioff, imx >> 6, 1);
+            imx += dx;
+            ioff += imx >> 10;
+            imx &= 0x3ff;
+        }
+
+        mid_ptr += 128;
+        src += PXSTRIDE(src_stride);
+    } while (--tmp_h);
+
+    mid_ptr = mid;
+    do {
+        int x;
+
+        for (x = 0; x < w; x++)
+            dst[x] = FILTER_BILIN_CLIP(mid_ptr, x, my >> 6, 128, 8);
+
+        my += dy;
+        mid_ptr += (my >> 10) * 128;
+        my &= 0x3ff;
+        dst += PXSTRIDE(dst_stride);
+    } while (--h);
+}
+
 static void prep_bilin_c(coef *tmp,
                          const pixel *src, ptrdiff_t src_stride,
                          const int w, int h, const int mx, const int my)
@@ -329,6 +475,43 @@
         prep_c(tmp, src, src_stride, w, h);
 }
 
+static void prep_bilin_scaled_c(coef *tmp,
+                                const pixel *src, ptrdiff_t src_stride,
+                                const int w, int h, const int mx, int my,
+                                const int dx, const int dy)
+{
+    int tmp_h = (((h - 1) * dy + my) >> 10) + 2;
+    coef mid[128 * (256 + 1)], *mid_ptr = mid;
+
+    do {
+        int x;
+        int imx = mx, ioff = 0;
+
+        for (x = 0; x < w; x++) {
+            mid_ptr[x] = FILTER_BILIN(src, ioff, imx >> 6, 1);
+            imx += dx;
+            ioff += imx >> 10;
+            imx &= 0x3ff;
+        }
+
+        mid_ptr += 128;
+        src += PXSTRIDE(src_stride);
+    } while (--tmp_h);
+
+    mid_ptr = mid;
+    do {
+        int x;
+
+        for (x = 0; x < w; x++)
+            tmp[x] = FILTER_BILIN_RND(mid_ptr, x, my >> 6, 128, 4);
+
+        my += dy;
+        mid_ptr += (my >> 10) * 128;
+        my &= 0x3ff;
+        tmp += w;
+    } while (--h);
+}
+
 static void avg_c(pixel *dst, const ptrdiff_t dst_stride,
                   const coef *tmp1, const coef *tmp2, const int w, int h)
 {
@@ -599,8 +782,10 @@
 
 void bitfn(dav1d_mc_dsp_init)(Dav1dMCDSPContext *const c) {
 #define init_mc_fns(type, name) do { \
-    c->mc [type] = put_##name##_c; \
-    c->mct[type] = prep_##name##_c; \
+    c->mc        [type] = put_##name##_c; \
+    c->mc_scaled [type] = put_##name##_scaled_c; \
+    c->mct       [type] = prep_##name##_c; \
+    c->mct_scaled[type] = prep_##name##_scaled_c; \
 } while (0)
 
     init_mc_fns(FILTER_2D_8TAP_REGULAR,        8tap_regular);
--- a/src/recon_tmpl.c
+++ b/src/recon_tmpl.c
@@ -496,7 +496,7 @@
               pixel *const dst8, coef *const dst16, const ptrdiff_t dst_stride,
               const int bw4, const int bh4,
               const int bx, const int by, const int pl,
-              const mv mv, const Dav1dThreadPicture *const refp,
+              const mv mv, const Dav1dThreadPicture *const refp, const int refidx,
               const enum Filter2d filter_2d)
 {
     assert((dst8 != NULL) ^ (dst16 != NULL));
@@ -506,45 +506,98 @@
     const int h_mul = 4 >> ss_hor, v_mul = 4 >> ss_ver;
     const int mvx = mv.x, mvy = mv.y;
     const int mx = mvx & (15 >> !ss_hor), my = mvy & (15 >> !ss_ver);
-    const int dx = bx * h_mul + (mvx >> (3 + ss_hor));
-    const int dy = by * v_mul + (mvy >> (3 + ss_ver));
     ptrdiff_t ref_stride = refp->p.stride[!!pl];
     const pixel *ref;
-    int w, h;
 
-    if (refp != &f->cur) { // i.e. not for intrabc
-        if (dav1d_thread_picture_wait(refp, dy + bh4 * v_mul + !!my * 4,
-                                      PLANE_TYPE_Y + !!pl))
+    if (refp->p.p.w == f->cur.p.p.w && refp->p.p.h == f->cur.p.p.h) {
+        const int dx = bx * h_mul + (mvx >> (3 + ss_hor));
+        const int dy = by * v_mul + (mvy >> (3 + ss_ver));
+        int w, h;
+
+        if (refp != &f->cur) { // i.e. not for intrabc
+            if (dav1d_thread_picture_wait(refp, dy + bh4 * v_mul + !!my * 4,
+                                          PLANE_TYPE_Y + !!pl))
+            {
+                return -1;
+            }
+            w = (f->cur.p.p.w + ss_hor) >> ss_hor;
+            h = (f->cur.p.p.h + ss_ver) >> ss_ver;
+        } else {
+            w = f->bw * 4 >> ss_hor;
+            h = f->bh * 4 >> ss_ver;
+        }
+        if (dx < !!mx * 3 || dy < !!my * 3 ||
+            dx + bw4 * h_mul + !!mx * 4 > w ||
+            dy + bh4 * v_mul + !!my * 4 > h)
         {
-            return -1;
+            f->dsp->mc.emu_edge(bw4 * h_mul + !!mx * 7, bh4 * v_mul + !!my * 7,
+                                w, h, dx - !!mx * 3, dy - !!my * 3,
+                                t->emu_edge, 192 * sizeof(pixel),
+                                refp->p.data[pl], ref_stride);
+            ref = &t->emu_edge[192 * !!my * 3 + !!mx * 3];
+            ref_stride = 192 * sizeof(pixel);
+        } else {
+            ref = ((pixel *) refp->p.data[pl]) + PXSTRIDE(ref_stride) * dy + dx;
         }
-        w = (f->cur.p.p.w + ss_hor) >> ss_hor;
-        h = (f->cur.p.p.h + ss_ver) >> ss_ver;
-    } else {
-        w = f->bw * 4 >> ss_hor;
-        h = f->bh * 4 >> ss_ver;
-    }
-    if (dx < !!mx * 3 || dy < !!my * 3 ||
-        dx + bw4 * h_mul + !!mx * 4 > w ||
-        dy + bh4 * v_mul + !!my * 4 > h)
-    {
-        f->dsp->mc.emu_edge(bw4 * h_mul + !!mx * 7, bh4 * v_mul + !!my * 7,
-                            w, h, dx - !!mx * 3, dy - !!my * 3,
-                            t->emu_edge, 192 * sizeof(pixel),
-                            refp->p.data[pl], ref_stride);
-        ref = &t->emu_edge[192 * !!my * 3 + !!mx * 3];
-        ref_stride = 192 * sizeof(pixel);
-    } else {
-        ref = ((pixel *) refp->p.data[pl]) + PXSTRIDE(ref_stride) * dy + dx;
-    }
 
-    if (dst8 != NULL) {
-        f->dsp->mc.mc[filter_2d](dst8, dst_stride, ref, ref_stride, bw4 * h_mul,
-                                 bh4 * v_mul, mx << !ss_hor, my << !ss_ver);
+        if (dst8 != NULL) {
+            f->dsp->mc.mc[filter_2d](dst8, dst_stride, ref, ref_stride, bw4 * h_mul,
+                                     bh4 * v_mul, mx << !ss_hor, my << !ss_ver);
+        } else {
+            f->dsp->mc.mct[filter_2d](dst16, ref, ref_stride, bw4 * h_mul,
+                                      bh4 * v_mul, mx << !ss_hor, my << !ss_ver);
+        }
     } else {
-        f->dsp->mc.mct[filter_2d](dst16, ref, ref_stride, bw4 * h_mul,
-                                  bh4 * v_mul, mx << !ss_hor, my << !ss_ver);
+        assert(refp != &f->cur);
+
+        int orig_pos_y = (by * v_mul << 4) + mvy * (1 << !ss_ver);
+        int orig_pos_x = (bx * h_mul << 4) + mvx * (1 << !ss_hor);
+#define scale_mv(res, val, scale) do { \
+            const int64_t tmp = (int64_t)(val) * scale + (scale - 0x4000) * 8; \
+            res = (int)apply_sign64((llabs(tmp) + 128) >> 8, tmp) + 32; \
+        } while (0)
+        int pos_y, pos_x;
+        scale_mv(pos_x, orig_pos_x, f->svc[refidx][0].scale);
+        scale_mv(pos_y, orig_pos_y, f->svc[refidx][1].scale);
+#undef scale_mv
+        const int left = pos_x >> 10;
+        const int top = pos_y >> 10;
+        const int right =
+            ((pos_x + (bw4 * h_mul - 1) * f->svc[refidx][0].step) >> 10) + 1;
+        const int bottom =
+            ((pos_y + (bh4 * v_mul - 1) * f->svc[refidx][1].step) >> 10) + 1;
+
+        if (dav1d_thread_picture_wait(refp, bottom, PLANE_TYPE_Y + !!pl))
+            return -1;
+
+        const int w = (refp->p.p.w + ss_hor) >> ss_hor;
+        const int h = (refp->p.p.h + ss_ver) >> ss_ver;
+        if (left < 3 || top < 3 || right + 4 > w || bottom + 4 > h) {
+            f->dsp->mc.emu_edge(right - left + 7, bottom - top + 7,
+                                w, h, left - 3, top - 3,
+                                t->emu_edge, 320 * sizeof(pixel),
+                                refp->p.data[pl], ref_stride);
+            ref = &t->emu_edge[320 * 3 + 3];
+            ref_stride = 320 * sizeof(pixel);
+        } else {
+            ref = ((pixel *) refp->p.data[pl]) + PXSTRIDE(ref_stride) * top + left;
+        }
+
+        if (dst8 != NULL) {
+            f->dsp->mc.mc_scaled[filter_2d](dst8, dst_stride, ref, ref_stride,
+                                            bw4 * h_mul, bh4 * v_mul,
+                                            pos_x & 0x3ff, pos_y & 0x3ff,
+                                            f->svc[refidx][0].step,
+                                            f->svc[refidx][1].step);
+        } else {
+            f->dsp->mc.mct_scaled[filter_2d](dst16, ref, ref_stride,
+                                             bw4 * h_mul, bh4 * v_mul,
+                                             pos_x & 0x3ff, pos_y & 0x3ff,
+                                             f->svc[refidx][0].step,
+                                             f->svc[refidx][1].step);
+        }
     }
+
     return 0;
 }
 
@@ -576,7 +629,7 @@
                 const int oh4 = imin(b_dim[1], 16) >> 1;
                 res = mc(t, lap, NULL, ow4 * h_mul * sizeof(pixel), ow4, oh4,
                          t->bx + x, t->by, pl, a_r->mv[0],
-                         &f->refp[a_r->ref[0] - 1],
+                         &f->refp[a_r->ref[0] - 1], a_r->ref[0] - 1,
                          dav1d_filter_2d[t->a->filter[1][bx4 + x + 1]][t->a->filter[0][bx4 + x + 1]]);
                 if (res) return res;
                 f->dsp->mc.blend_h(&dst[x * h_mul], dst_stride, lap,
@@ -599,7 +652,7 @@
                 const int oh4 = iclip(l_b_dim[1], 2, b_dim[1]);
                 res = mc(t, lap, NULL, h_mul * ow4 * sizeof(pixel), ow4, oh4,
                          t->bx, t->by + y, pl, l_r->mv[0],
-                         &f->refp[l_r->ref[0] - 1],
+                         &f->refp[l_r->ref[0] - 1], l_r->ref[0] - 1,
                          dav1d_filter_2d[t->l.filter[1][by4 + y + 1]][t->l.filter[0][by4 + y + 1]]);
                 if (res) return res;
                 f->dsp->mc.blend_v(&dst[y * v_mul * PXSTRIDE(dst_stride)],
@@ -1091,13 +1144,13 @@
     if (!(f->frame_hdr.frame_type & 1)) {
         // intrabc
         res = mc(t, dst, NULL, f->cur.p.stride[0],
-                 bw4, bh4, t->bx, t->by, 0, b->mv[0], &f->cur, FILTER_2D_BILINEAR);
+                 bw4, bh4, t->bx, t->by, 0, b->mv[0], &f->cur, -1, FILTER_2D_BILINEAR);
         if (res) return res;
         if (has_chroma) for (int pl = 1; pl < 3; pl++) {
             res = mc(t, ((pixel *)f->cur.p.data[pl]) + uvdstoff, NULL, f->cur.p.stride[1],
                      bw4 << (bw4 == ss_hor), bh4 << (bh4 == ss_ver),
                      t->bx & ~ss_hor, t->by & ~ss_ver,
-                     pl, b->mv[0], &f->cur, FILTER_2D_BILINEAR);
+                     pl, b->mv[0], &f->cur, -1, FILTER_2D_BILINEAR);
             if (res) return res;
         }
     } else if (b->comp_type == COMP_INTER_NONE) {
@@ -1116,7 +1169,7 @@
             if (res) return res;
         } else {
             res = mc(t, dst, NULL, f->cur.p.stride[0],
-                     bw4, bh4, t->bx, t->by, 0, b->mv[0], refp, filter_2d);
+                     bw4, bh4, t->bx, t->by, 0, b->mv[0], refp, b->ref[0], filter_2d);
             if (res) return res;
             if (b->motion_mode == MM_OBMC) {
                 res = obmc(t, dst, f->cur.p.stride[0], b_dim, 0, bx4, by4, w4, h4);
@@ -1176,6 +1229,7 @@
                              bw4, bh4, t->bx - 1, t->by - 1, 1 + pl,
                              r[-(f->b4_stride + 1)].mv[0],
                              &f->refp[r[-(f->b4_stride + 1)].ref[0] - 1],
+                             r[-(f->b4_stride + 1)].ref[0] - 1,
                              f->frame_thread.pass != 2 ? t->tl_4x4_filter :
                                  f->frame_thread.b[((t->by - 1) * f->b4_stride) + t->bx - 1].filter2d);
                     if (res) return res;
@@ -1190,6 +1244,7 @@
                     res = mc(t, ((pixel *) f->cur.p.data[1 + pl]) + uvdstoff + v_off, NULL,
                              f->cur.p.stride[1], bw4, bh4, t->bx - 1,
                              t->by, 1 + pl, r[-1].mv[0], &f->refp[r[-1].ref[0] - 1],
+                             r[-1].ref[0] - 1,
                              f->frame_thread.pass != 2 ? left_filter_2d :
                                  f->frame_thread.b[(t->by * f->b4_stride) + t->bx - 1].filter2d);
                     if (res) return res;
@@ -1204,6 +1259,7 @@
                              f->cur.p.stride[1], bw4, bh4, t->bx, t->by - 1,
                              1 + pl, r[-f->b4_stride].mv[0],
                              &f->refp[r[-f->b4_stride].ref[0] - 1],
+                             r[-f->b4_stride].ref[0] - 1,
                              f->frame_thread.pass != 2 ? top_filter_2d :
                                  f->frame_thread.b[((t->by - 1) * f->b4_stride) + t->bx].filter2d);
                     if (res) return res;
@@ -1212,7 +1268,8 @@
             }
             for (int pl = 0; pl < 2; pl++) {
                 res = mc(t, ((pixel *) f->cur.p.data[1 + pl]) + uvdstoff + h_off + v_off, NULL, f->cur.p.stride[1],
-                         bw4, bh4, t->bx, t->by, 1 + pl, b->mv[0], refp, filter_2d);
+                         bw4, bh4, t->bx, t->by, 1 + pl, b->mv[0],
+                         refp, b->ref[0], filter_2d);
                 if (res) return res;
             }
         } else {
@@ -1235,7 +1292,7 @@
                              NULL, f->cur.p.stride[1],
                              bw4 << (bw4 == ss_hor), bh4 << (bh4 == ss_ver),
                              t->bx & ~ss_hor, t->by & ~ss_ver,
-                             1 + pl, b->mv[0], refp, filter_2d);
+                             1 + pl, b->mv[0], refp, b->ref[0], filter_2d);
                     if (res) return res;
                     if (b->motion_mode == MM_OBMC) {
                         res = obmc(t, ((pixel *) f->cur.p.data[1 + pl]) + uvdstoff,
@@ -1307,7 +1364,7 @@
                 if (res) return res;
             } else {
                 res = mc(t, NULL, tmp[i], 0, bw4, bh4, t->bx, t->by, 0,
-                         b->mv[i], refp, filter_2d);
+                         b->mv[i], refp, b->ref[i], filter_2d);
                 if (res) return res;
             }
         }
@@ -1350,7 +1407,7 @@
                     if (res) return res;
                 } else {
                     res = mc(t, NULL, tmp[i], 0, bw4, bh4, t->bx, t->by,
-                             1 + pl, b->mv[i], refp, filter_2d);
+                             1 + pl, b->mv[i], refp, b->ref[i], filter_2d);
                     if (res) return res;
                 }
             }
--- a/src/warpmv.c
+++ b/src/warpmv.c
@@ -67,10 +67,6 @@
     return apply_sign((abs(cv) + 32) >> 6, cv) * (1 << 6);
 }
 
-static inline int apply_sign64(const int v, const int64_t s) {
-    return s < 0 ? -v : v;
-}
-
 static inline int resolve_divisor_32(const unsigned d, int *const shift) {
     *shift = ulog2(d);
     const int e = d - (1 << *shift);