ref: ec53ec6d5b4ebbb06d5c7209fd4f25634a6ad606
parent: e10b855c530a3a5ed519fcb3f3ec085a6e0aa79e
author: Ronald S. Bultje <rsbultje@gmail.com>
date: Tue Nov 13 09:24:12 EST 2018
Add support for scaled references Fixes #121.
--- a/include/common/intops.h
+++ b/include/common/intops.h
@@ -52,6 +52,10 @@
return s < 0 ? -v : v;
}
+static inline int apply_sign64(const int v, const int64_t s) {
+ return s < 0 ? -v : v;
+}
+
static inline int ulog2(const unsigned v) {
return 31 - clz(v);
}
--- a/src/decode.c
+++ b/src/decode.c
@@ -1720,7 +1720,8 @@
uint64_t mask[2] = { 0, 0 };
find_matching_ref(t, intra_edge_flags, bw4, bh4, w4, h4,
have_left, have_top, b->ref[0], mask);
- const int allow_warp = !f->frame_hdr.force_integer_mv &&
+ const int allow_warp = !f->svc[b->ref[0]][0].scale &&
+ !f->frame_hdr.force_integer_mv &&
f->frame_hdr.warp_motion && (mask[0] | mask[1]);
b->motion_mode = allow_warp ?
@@ -2938,8 +2939,10 @@
for (int i = 0; i < 7; i++) {
const int refidx = f->frame_hdr.refidx[i];
if (!c->refs[refidx].p.p.data[0] ||
- f->frame_hdr.width != c->refs[refidx].p.p.p.w ||
- f->frame_hdr.height != c->refs[refidx].p.p.p.h ||
+ f->frame_hdr.width * 2 < c->refs[refidx].p.p.p.w ||
+ f->frame_hdr.height * 2 < c->refs[refidx].p.p.p.h ||
+ f->frame_hdr.width > c->refs[refidx].p.p.p.w * 16 ||
+ f->frame_hdr.height > c->refs[refidx].p.p.p.h * 16 ||
f->seq_hdr.layout != c->refs[refidx].p.p.p.layout ||
f->seq_hdr.bpc != c->refs[refidx].p.p.p.bpc)
{
@@ -2949,6 +2952,21 @@
goto error;
}
dav1d_thread_picture_ref(&f->refp[i], &c->refs[refidx].p);
+ if (f->frame_hdr.width != c->refs[refidx].p.p.p.w ||
+ f->frame_hdr.height != c->refs[refidx].p.p.p.h)
+ {
+#define scale_fac(ref_sz, this_sz) \
+ (((ref_sz << 14) + (this_sz >> 1)) / this_sz)
+ f->svc[i][0].scale = scale_fac(c->refs[refidx].p.p.p.w,
+ f->frame_hdr.width);
+ f->svc[i][1].scale = scale_fac(c->refs[refidx].p.p.p.h,
+ f->frame_hdr.height);
+#undef scale_fac
+ f->svc[i][0].step = (f->svc[i][0].scale + 8) >> 4;
+ f->svc[i][1].step = (f->svc[i][1].scale + 8) >> 4;
+ } else {
+ f->svc[i][0].scale = 0;
+ }
}
}
--- a/src/internal.h
+++ b/src/internal.h
@@ -132,6 +132,12 @@
} tile[256];
int n_tile_data;
+ // for scalable references
+ struct ScalableMotionParams {
+ int scale; // if no scaling, this is 0
+ int step;
+ } svc[7][2 /* x, y */];
+
const Dav1dContext *c;
Dav1dTileContext *tc;
int n_tc;
@@ -244,7 +250,7 @@
int bx, by;
BlockContext l, *a;
coef *cf;
- pixel *emu_edge; // stride=160
+ pixel *emu_edge; // stride=192 for non-SVC, or 320 for SVC
// FIXME types can be changed to pixel (and dynamically allocated)
// which would make copy/assign operations slightly faster?
uint16_t al_pal[2 /* a/l */][32 /* bx/y4 */][3 /* plane */][8 /* palette_idx */];
--- a/src/lib.c
+++ b/src/lib.c
@@ -117,7 +117,7 @@
if (!t->scratch.mem) goto error;
memset(t->cf, 0, 32 * 32 * sizeof(int32_t));
t->emu_edge =
- dav1d_alloc_aligned(192 * (128 + 7) * sizeof(uint16_t), 32);
+ dav1d_alloc_aligned(320 * (256 + 7) * sizeof(uint16_t), 32);
if (!t->emu_edge) goto error;
if (f->n_tc > 1) {
pthread_mutex_init(&t->tile_thread.td.lock, NULL);
--- a/src/mc.h
+++ b/src/mc.h
@@ -41,6 +41,12 @@
int w, int h, int mx, int my)
typedef decl_mc_fn(*mc_fn);
+#define decl_mc_scaled_fn(name) \
+void (name)(pixel *dst, ptrdiff_t dst_stride, \
+ const pixel *src, ptrdiff_t src_stride, \
+ int w, int h, int mx, int my, int dx, int dy)
+typedef decl_mc_scaled_fn(*mc_scaled_fn);
+
#define decl_warp8x8_fn(name) \
void (name)(pixel *dst, ptrdiff_t dst_stride, \
const pixel *src, ptrdiff_t src_stride, \
@@ -52,6 +58,11 @@
int w, int h, int mx, int my)
typedef decl_mct_fn(*mct_fn);
+#define decl_mct_scaled_fn(name) \
+void (name)(coef *tmp, const pixel *src, ptrdiff_t src_stride, \
+ int w, int h, int mx, int my, int dx, int dy)
+typedef decl_mct_scaled_fn(*mct_scaled_fn);
+
#define decl_warp8x8t_fn(name) \
void (name)(coef *tmp, const ptrdiff_t tmp_stride, \
const pixel *src, ptrdiff_t src_stride, \
@@ -96,7 +107,9 @@
typedef struct Dav1dMCDSPContext {
mc_fn mc[N_2D_FILTERS];
+ mc_scaled_fn mc_scaled[N_2D_FILTERS];
mct_fn mct[N_2D_FILTERS];
+ mct_scaled_fn mct_scaled[N_2D_FILTERS];
avg_fn avg;
w_avg_fn w_avg;
mask_fn mask;
--- a/src/mc_tmpl.c
+++ b/src/mc_tmpl.c
@@ -78,13 +78,19 @@
#define FILTER_8TAP_CLIP(src, x, F, stride, sh) \
iclip_pixel(FILTER_8TAP_RND(src, x, F, stride, sh))
+#define GET_H_FILTER(mx) \
+ const int8_t *const fh = !(mx) ? NULL : w > 4 ? \
+ dav1d_mc_subpel_filters[filter_type & 3][(mx) - 1] : \
+ dav1d_mc_subpel_filters[3 + (filter_type & 1)][(mx) - 1]
+
+#define GET_V_FILTER(my) \
+ const int8_t *const fv = !(my) ? NULL : h > 4 ? \
+ dav1d_mc_subpel_filters[filter_type >> 2][(my) - 1] : \
+ dav1d_mc_subpel_filters[3 + ((filter_type >> 2) & 1)][(my) - 1]
+
#define GET_FILTERS() \
- const int8_t *const fh = !mx ? NULL : w > 4 ? \
- dav1d_mc_subpel_filters[filter_type & 3][mx - 1] : \
- dav1d_mc_subpel_filters[3 + (filter_type & 1)][mx - 1]; \
- const int8_t *const fv = !my ? NULL : h > 4 ? \
- dav1d_mc_subpel_filters[filter_type >> 2][my - 1] : \
- dav1d_mc_subpel_filters[3 + ((filter_type >> 2) & 1)][my - 1]; \
+ GET_H_FILTER(mx); \
+ GET_V_FILTER(my)
static NOINLINE void
put_8tap_c(pixel *dst, ptrdiff_t dst_stride,
@@ -142,6 +148,48 @@
}
static NOINLINE void
+put_8tap_scaled_c(pixel *dst, const ptrdiff_t dst_stride,
+ const pixel *src, const ptrdiff_t src_stride,
+ const int w, int h, const int mx, int my,
+ const int dx, const int dy, const int filter_type)
+{
+ int tmp_h = (((h - 1) * dy + my) >> 10) + 8;
+ coef mid[128 * (256 + 7)], *mid_ptr = mid;
+
+ src -= src_stride * 3;
+ do {
+ int x;
+ int imx = mx, ioff = 0;
+
+ for (x = 0; x < w; x++) {
+ GET_H_FILTER(imx >> 6);
+ mid_ptr[x] = fh ? FILTER_8TAP_RND(src, ioff, fh, 1, 2) : src[ioff] << 4;
+ imx += dx;
+ ioff += imx >> 10;
+ imx &= 0x3ff;
+ }
+
+ mid_ptr += 128;
+ src += PXSTRIDE(src_stride);
+ } while (--tmp_h);
+
+ mid_ptr = mid + 128 * 3;
+ for (int y = 0; y < h; y++) {
+ int x;
+ GET_V_FILTER(my >> 6);
+
+ for (x = 0; x < w; x++)
+ dst[x] = fv ? FILTER_8TAP_CLIP(mid_ptr, x, fv, 128, 10) :
+ (mid_ptr[x] + 8) >> 4;
+
+ my += dy;
+ mid_ptr += (my >> 10) * 128;
+ my &= 0x3ff;
+ dst += PXSTRIDE(dst_stride);
+ }
+}
+
+static NOINLINE void
prep_8tap_c(coef *tmp, const pixel *src, ptrdiff_t src_stride,
const int w, int h, const int mx, const int my,
const int filter_type)
@@ -192,6 +240,46 @@
prep_c(tmp, src, src_stride, w, h);
}
+static NOINLINE void
+prep_8tap_scaled_c(coef *tmp, const pixel *src, const ptrdiff_t src_stride,
+ const int w, int h, const int mx, int my,
+ const int dx, const int dy, const int filter_type)
+{
+ int tmp_h = (((h - 1) * dy + my) >> 10) + 8;
+ coef mid[128 * (256 + 7)], *mid_ptr = mid;
+
+ src -= src_stride * 3;
+ do {
+ int x;
+ int imx = mx, ioff = 0;
+
+ for (x = 0; x < w; x++) {
+ GET_H_FILTER(imx >> 6);
+ mid_ptr[x] = fh ? FILTER_8TAP_RND(src, ioff, fh, 1, 2) : src[ioff] << 4;
+ imx += dx;
+ ioff += imx >> 10;
+ imx &= 0x3ff;
+ }
+
+ mid_ptr += 128;
+ src += PXSTRIDE(src_stride);
+ } while (--tmp_h);
+
+ mid_ptr = mid + 128 * 3;
+ for (int y = 0; y < h; y++) {
+ int x;
+ GET_V_FILTER(my >> 6);
+
+ for (x = 0; x < w; x++)
+ tmp[x] = fv ? FILTER_8TAP_CLIP(mid_ptr, x, fv, 128, 6) : mid_ptr[x];
+
+ my += dy;
+ mid_ptr += (my >> 10) * 128;
+ my &= 0x3ff;
+ tmp += w;
+ }
+}
+
#define filter_fns(type, type_h, type_v) \
static void put_8tap_##type##_c(pixel *const dst, \
const ptrdiff_t dst_stride, \
@@ -203,6 +291,17 @@
put_8tap_c(dst, dst_stride, src, src_stride, w, h, mx, my, \
type_h | (type_v << 2)); \
} \
+static void put_8tap_##type##_scaled_c(pixel *const dst, \
+ const ptrdiff_t dst_stride, \
+ const pixel *const src, \
+ const ptrdiff_t src_stride, \
+ const int w, const int h, \
+ const int mx, const int my, \
+ const int dx, const int dy) \
+{ \
+ put_8tap_scaled_c(dst, dst_stride, src, src_stride, w, h, mx, my, dx, dy, \
+ type_h | (type_v << 2)); \
+} \
static void prep_8tap_##type##_c(coef *const tmp, \
const pixel *const src, \
const ptrdiff_t src_stride, \
@@ -211,6 +310,16 @@
{ \
prep_8tap_c(tmp, src, src_stride, w, h, mx, my, \
type_h | (type_v << 2)); \
+} \
+static void prep_8tap_##type##_scaled_c(coef *const tmp, \
+ const pixel *const src, \
+ const ptrdiff_t src_stride, \
+ const int w, const int h, \
+ const int mx, const int my, \
+ const int dx, const int dy) \
+{ \
+ prep_8tap_scaled_c(tmp, src, src_stride, w, h, mx, my, dx, dy, \
+ type_h | (type_v << 2)); \
}
filter_fns(regular, FILTER_8TAP_REGULAR, FILTER_8TAP_REGULAR)
@@ -281,6 +390,43 @@
put_c(dst, dst_stride, src, src_stride, w, h);
}
+static void put_bilin_scaled_c(pixel *dst, ptrdiff_t dst_stride,
+ const pixel *src, ptrdiff_t src_stride,
+ const int w, int h, const int mx, int my,
+ const int dx, const int dy)
+{
+ int tmp_h = (((h - 1) * dy + my) >> 10) + 2;
+ coef mid[128 * (256 + 1)], *mid_ptr = mid;
+
+ do {
+ int x;
+ int imx = mx, ioff = 0;
+
+ for (x = 0; x < w; x++) {
+ mid_ptr[x] = FILTER_BILIN(src, ioff, imx >> 6, 1);
+ imx += dx;
+ ioff += imx >> 10;
+ imx &= 0x3ff;
+ }
+
+ mid_ptr += 128;
+ src += PXSTRIDE(src_stride);
+ } while (--tmp_h);
+
+ mid_ptr = mid;
+ do {
+ int x;
+
+ for (x = 0; x < w; x++)
+ dst[x] = FILTER_BILIN_CLIP(mid_ptr, x, my >> 6, 128, 8);
+
+ my += dy;
+ mid_ptr += (my >> 10) * 128;
+ my &= 0x3ff;
+ dst += PXSTRIDE(dst_stride);
+ } while (--h);
+}
+
static void prep_bilin_c(coef *tmp,
const pixel *src, ptrdiff_t src_stride,
const int w, int h, const int mx, const int my)
@@ -329,6 +475,43 @@
prep_c(tmp, src, src_stride, w, h);
}
+static void prep_bilin_scaled_c(coef *tmp,
+ const pixel *src, ptrdiff_t src_stride,
+ const int w, int h, const int mx, int my,
+ const int dx, const int dy)
+{
+ int tmp_h = (((h - 1) * dy + my) >> 10) + 2;
+ coef mid[128 * (256 + 1)], *mid_ptr = mid;
+
+ do {
+ int x;
+ int imx = mx, ioff = 0;
+
+ for (x = 0; x < w; x++) {
+ mid_ptr[x] = FILTER_BILIN(src, ioff, imx >> 6, 1);
+ imx += dx;
+ ioff += imx >> 10;
+ imx &= 0x3ff;
+ }
+
+ mid_ptr += 128;
+ src += PXSTRIDE(src_stride);
+ } while (--tmp_h);
+
+ mid_ptr = mid;
+ do {
+ int x;
+
+ for (x = 0; x < w; x++)
+ tmp[x] = FILTER_BILIN_RND(mid_ptr, x, my >> 6, 128, 4);
+
+ my += dy;
+ mid_ptr += (my >> 10) * 128;
+ my &= 0x3ff;
+ tmp += w;
+ } while (--h);
+}
+
static void avg_c(pixel *dst, const ptrdiff_t dst_stride,
const coef *tmp1, const coef *tmp2, const int w, int h)
{
@@ -599,8 +782,10 @@
void bitfn(dav1d_mc_dsp_init)(Dav1dMCDSPContext *const c) {
#define init_mc_fns(type, name) do { \
- c->mc [type] = put_##name##_c; \
- c->mct[type] = prep_##name##_c; \
+ c->mc [type] = put_##name##_c; \
+ c->mc_scaled [type] = put_##name##_scaled_c; \
+ c->mct [type] = prep_##name##_c; \
+ c->mct_scaled[type] = prep_##name##_scaled_c; \
} while (0)
init_mc_fns(FILTER_2D_8TAP_REGULAR, 8tap_regular);
--- a/src/recon_tmpl.c
+++ b/src/recon_tmpl.c
@@ -496,7 +496,7 @@
pixel *const dst8, coef *const dst16, const ptrdiff_t dst_stride,
const int bw4, const int bh4,
const int bx, const int by, const int pl,
- const mv mv, const Dav1dThreadPicture *const refp,
+ const mv mv, const Dav1dThreadPicture *const refp, const int refidx,
const enum Filter2d filter_2d)
{
assert((dst8 != NULL) ^ (dst16 != NULL));
@@ -506,45 +506,98 @@
const int h_mul = 4 >> ss_hor, v_mul = 4 >> ss_ver;
const int mvx = mv.x, mvy = mv.y;
const int mx = mvx & (15 >> !ss_hor), my = mvy & (15 >> !ss_ver);
- const int dx = bx * h_mul + (mvx >> (3 + ss_hor));
- const int dy = by * v_mul + (mvy >> (3 + ss_ver));
ptrdiff_t ref_stride = refp->p.stride[!!pl];
const pixel *ref;
- int w, h;
- if (refp != &f->cur) { // i.e. not for intrabc
- if (dav1d_thread_picture_wait(refp, dy + bh4 * v_mul + !!my * 4,
- PLANE_TYPE_Y + !!pl))
+ if (refp->p.p.w == f->cur.p.p.w && refp->p.p.h == f->cur.p.p.h) {
+ const int dx = bx * h_mul + (mvx >> (3 + ss_hor));
+ const int dy = by * v_mul + (mvy >> (3 + ss_ver));
+ int w, h;
+
+ if (refp != &f->cur) { // i.e. not for intrabc
+ if (dav1d_thread_picture_wait(refp, dy + bh4 * v_mul + !!my * 4,
+ PLANE_TYPE_Y + !!pl))
+ {
+ return -1;
+ }
+ w = (f->cur.p.p.w + ss_hor) >> ss_hor;
+ h = (f->cur.p.p.h + ss_ver) >> ss_ver;
+ } else {
+ w = f->bw * 4 >> ss_hor;
+ h = f->bh * 4 >> ss_ver;
+ }
+ if (dx < !!mx * 3 || dy < !!my * 3 ||
+ dx + bw4 * h_mul + !!mx * 4 > w ||
+ dy + bh4 * v_mul + !!my * 4 > h)
{
- return -1;
+ f->dsp->mc.emu_edge(bw4 * h_mul + !!mx * 7, bh4 * v_mul + !!my * 7,
+ w, h, dx - !!mx * 3, dy - !!my * 3,
+ t->emu_edge, 192 * sizeof(pixel),
+ refp->p.data[pl], ref_stride);
+ ref = &t->emu_edge[192 * !!my * 3 + !!mx * 3];
+ ref_stride = 192 * sizeof(pixel);
+ } else {
+ ref = ((pixel *) refp->p.data[pl]) + PXSTRIDE(ref_stride) * dy + dx;
}
- w = (f->cur.p.p.w + ss_hor) >> ss_hor;
- h = (f->cur.p.p.h + ss_ver) >> ss_ver;
- } else {
- w = f->bw * 4 >> ss_hor;
- h = f->bh * 4 >> ss_ver;
- }
- if (dx < !!mx * 3 || dy < !!my * 3 ||
- dx + bw4 * h_mul + !!mx * 4 > w ||
- dy + bh4 * v_mul + !!my * 4 > h)
- {
- f->dsp->mc.emu_edge(bw4 * h_mul + !!mx * 7, bh4 * v_mul + !!my * 7,
- w, h, dx - !!mx * 3, dy - !!my * 3,
- t->emu_edge, 192 * sizeof(pixel),
- refp->p.data[pl], ref_stride);
- ref = &t->emu_edge[192 * !!my * 3 + !!mx * 3];
- ref_stride = 192 * sizeof(pixel);
- } else {
- ref = ((pixel *) refp->p.data[pl]) + PXSTRIDE(ref_stride) * dy + dx;
- }
- if (dst8 != NULL) {
- f->dsp->mc.mc[filter_2d](dst8, dst_stride, ref, ref_stride, bw4 * h_mul,
- bh4 * v_mul, mx << !ss_hor, my << !ss_ver);
+ if (dst8 != NULL) {
+ f->dsp->mc.mc[filter_2d](dst8, dst_stride, ref, ref_stride, bw4 * h_mul,
+ bh4 * v_mul, mx << !ss_hor, my << !ss_ver);
+ } else {
+ f->dsp->mc.mct[filter_2d](dst16, ref, ref_stride, bw4 * h_mul,
+ bh4 * v_mul, mx << !ss_hor, my << !ss_ver);
+ }
} else {
- f->dsp->mc.mct[filter_2d](dst16, ref, ref_stride, bw4 * h_mul,
- bh4 * v_mul, mx << !ss_hor, my << !ss_ver);
+ assert(refp != &f->cur);
+
+ int orig_pos_y = (by * v_mul << 4) + mvy * (1 << !ss_ver);
+ int orig_pos_x = (bx * h_mul << 4) + mvx * (1 << !ss_hor);
+#define scale_mv(res, val, scale) do { \
+ const int64_t tmp = (int64_t)(val) * scale + (scale - 0x4000) * 8; \
+ res = (int)apply_sign64((llabs(tmp) + 128) >> 8, tmp) + 32; \
+ } while (0)
+ int pos_y, pos_x;
+ scale_mv(pos_x, orig_pos_x, f->svc[refidx][0].scale);
+ scale_mv(pos_y, orig_pos_y, f->svc[refidx][1].scale);
+#undef scale_mv
+ const int left = pos_x >> 10;
+ const int top = pos_y >> 10;
+ const int right =
+ ((pos_x + (bw4 * h_mul - 1) * f->svc[refidx][0].step) >> 10) + 1;
+ const int bottom =
+ ((pos_y + (bh4 * v_mul - 1) * f->svc[refidx][1].step) >> 10) + 1;
+
+ if (dav1d_thread_picture_wait(refp, bottom, PLANE_TYPE_Y + !!pl))
+ return -1;
+
+ const int w = (refp->p.p.w + ss_hor) >> ss_hor;
+ const int h = (refp->p.p.h + ss_ver) >> ss_ver;
+ if (left < 3 || top < 3 || right + 4 > w || bottom + 4 > h) {
+ f->dsp->mc.emu_edge(right - left + 7, bottom - top + 7,
+ w, h, left - 3, top - 3,
+ t->emu_edge, 320 * sizeof(pixel),
+ refp->p.data[pl], ref_stride);
+ ref = &t->emu_edge[320 * 3 + 3];
+ ref_stride = 320 * sizeof(pixel);
+ } else {
+ ref = ((pixel *) refp->p.data[pl]) + PXSTRIDE(ref_stride) * top + left;
+ }
+
+ if (dst8 != NULL) {
+ f->dsp->mc.mc_scaled[filter_2d](dst8, dst_stride, ref, ref_stride,
+ bw4 * h_mul, bh4 * v_mul,
+ pos_x & 0x3ff, pos_y & 0x3ff,
+ f->svc[refidx][0].step,
+ f->svc[refidx][1].step);
+ } else {
+ f->dsp->mc.mct_scaled[filter_2d](dst16, ref, ref_stride,
+ bw4 * h_mul, bh4 * v_mul,
+ pos_x & 0x3ff, pos_y & 0x3ff,
+ f->svc[refidx][0].step,
+ f->svc[refidx][1].step);
+ }
}
+
return 0;
}
@@ -576,7 +629,7 @@
const int oh4 = imin(b_dim[1], 16) >> 1;
res = mc(t, lap, NULL, ow4 * h_mul * sizeof(pixel), ow4, oh4,
t->bx + x, t->by, pl, a_r->mv[0],
- &f->refp[a_r->ref[0] - 1],
+ &f->refp[a_r->ref[0] - 1], a_r->ref[0] - 1,
dav1d_filter_2d[t->a->filter[1][bx4 + x + 1]][t->a->filter[0][bx4 + x + 1]]);
if (res) return res;
f->dsp->mc.blend_h(&dst[x * h_mul], dst_stride, lap,
@@ -599,7 +652,7 @@
const int oh4 = iclip(l_b_dim[1], 2, b_dim[1]);
res = mc(t, lap, NULL, h_mul * ow4 * sizeof(pixel), ow4, oh4,
t->bx, t->by + y, pl, l_r->mv[0],
- &f->refp[l_r->ref[0] - 1],
+ &f->refp[l_r->ref[0] - 1], l_r->ref[0] - 1,
dav1d_filter_2d[t->l.filter[1][by4 + y + 1]][t->l.filter[0][by4 + y + 1]]);
if (res) return res;
f->dsp->mc.blend_v(&dst[y * v_mul * PXSTRIDE(dst_stride)],
@@ -1091,13 +1144,13 @@
if (!(f->frame_hdr.frame_type & 1)) {
// intrabc
res = mc(t, dst, NULL, f->cur.p.stride[0],
- bw4, bh4, t->bx, t->by, 0, b->mv[0], &f->cur, FILTER_2D_BILINEAR);
+ bw4, bh4, t->bx, t->by, 0, b->mv[0], &f->cur, -1, FILTER_2D_BILINEAR);
if (res) return res;
if (has_chroma) for (int pl = 1; pl < 3; pl++) {
res = mc(t, ((pixel *)f->cur.p.data[pl]) + uvdstoff, NULL, f->cur.p.stride[1],
bw4 << (bw4 == ss_hor), bh4 << (bh4 == ss_ver),
t->bx & ~ss_hor, t->by & ~ss_ver,
- pl, b->mv[0], &f->cur, FILTER_2D_BILINEAR);
+ pl, b->mv[0], &f->cur, -1, FILTER_2D_BILINEAR);
if (res) return res;
}
} else if (b->comp_type == COMP_INTER_NONE) {
@@ -1116,7 +1169,7 @@
if (res) return res;
} else {
res = mc(t, dst, NULL, f->cur.p.stride[0],
- bw4, bh4, t->bx, t->by, 0, b->mv[0], refp, filter_2d);
+ bw4, bh4, t->bx, t->by, 0, b->mv[0], refp, b->ref[0], filter_2d);
if (res) return res;
if (b->motion_mode == MM_OBMC) {
res = obmc(t, dst, f->cur.p.stride[0], b_dim, 0, bx4, by4, w4, h4);
@@ -1176,6 +1229,7 @@
bw4, bh4, t->bx - 1, t->by - 1, 1 + pl,
r[-(f->b4_stride + 1)].mv[0],
&f->refp[r[-(f->b4_stride + 1)].ref[0] - 1],
+ r[-(f->b4_stride + 1)].ref[0] - 1,
f->frame_thread.pass != 2 ? t->tl_4x4_filter :
f->frame_thread.b[((t->by - 1) * f->b4_stride) + t->bx - 1].filter2d);
if (res) return res;
@@ -1190,6 +1244,7 @@
res = mc(t, ((pixel *) f->cur.p.data[1 + pl]) + uvdstoff + v_off, NULL,
f->cur.p.stride[1], bw4, bh4, t->bx - 1,
t->by, 1 + pl, r[-1].mv[0], &f->refp[r[-1].ref[0] - 1],
+ r[-1].ref[0] - 1,
f->frame_thread.pass != 2 ? left_filter_2d :
f->frame_thread.b[(t->by * f->b4_stride) + t->bx - 1].filter2d);
if (res) return res;
@@ -1204,6 +1259,7 @@
f->cur.p.stride[1], bw4, bh4, t->bx, t->by - 1,
1 + pl, r[-f->b4_stride].mv[0],
&f->refp[r[-f->b4_stride].ref[0] - 1],
+ r[-f->b4_stride].ref[0] - 1,
f->frame_thread.pass != 2 ? top_filter_2d :
f->frame_thread.b[((t->by - 1) * f->b4_stride) + t->bx].filter2d);
if (res) return res;
@@ -1212,7 +1268,8 @@
}
for (int pl = 0; pl < 2; pl++) {
res = mc(t, ((pixel *) f->cur.p.data[1 + pl]) + uvdstoff + h_off + v_off, NULL, f->cur.p.stride[1],
- bw4, bh4, t->bx, t->by, 1 + pl, b->mv[0], refp, filter_2d);
+ bw4, bh4, t->bx, t->by, 1 + pl, b->mv[0],
+ refp, b->ref[0], filter_2d);
if (res) return res;
}
} else {
@@ -1235,7 +1292,7 @@
NULL, f->cur.p.stride[1],
bw4 << (bw4 == ss_hor), bh4 << (bh4 == ss_ver),
t->bx & ~ss_hor, t->by & ~ss_ver,
- 1 + pl, b->mv[0], refp, filter_2d);
+ 1 + pl, b->mv[0], refp, b->ref[0], filter_2d);
if (res) return res;
if (b->motion_mode == MM_OBMC) {
res = obmc(t, ((pixel *) f->cur.p.data[1 + pl]) + uvdstoff,
@@ -1307,7 +1364,7 @@
if (res) return res;
} else {
res = mc(t, NULL, tmp[i], 0, bw4, bh4, t->bx, t->by, 0,
- b->mv[i], refp, filter_2d);
+ b->mv[i], refp, b->ref[i], filter_2d);
if (res) return res;
}
}
@@ -1350,7 +1407,7 @@
if (res) return res;
} else {
res = mc(t, NULL, tmp[i], 0, bw4, bh4, t->bx, t->by,
- 1 + pl, b->mv[i], refp, filter_2d);
+ 1 + pl, b->mv[i], refp, b->ref[i], filter_2d);
if (res) return res;
}
}
--- a/src/warpmv.c
+++ b/src/warpmv.c
@@ -67,10 +67,6 @@
return apply_sign((abs(cv) + 32) >> 6, cv) * (1 << 6);
}
-static inline int apply_sign64(const int v, const int64_t s) {
- return s < 0 ? -v : v;
-}
-
static inline int resolve_divisor_32(const unsigned d, int *const shift) {
*shift = ulog2(d);
const int e = d - (1 << *shift);