ref: 2f251bd11528a930934b77e2ee0056d5075a35e6
parent: 4b0683a615a353757ad75c4eb4ee67e12a0aa8ce
author: Ronald S. Bultje <rsbultje@gmail.com>
date: Fri Nov 9 09:13:05 EST 2018
Add a max_width/height argument to angular_ipred_fn This is used in z2 to limit the number of pixels over which the filter is applied, as per "numPx" in 7.11.2.4 point 4 in the AV1 specification. This only applies to z2, because in z1/3, the edge filter is (incomprehensibly) lengtened by the opposite side's edge length, which undoes the limit on the filter length (like a bug undoing another bug). I admit the code is getting rather complex, so we may want to redesign this to make writing SIMD easier.
--- a/src/ipred.h
+++ b/src/ipred.h
@@ -41,7 +41,7 @@
*/
#define decl_angular_ipred_fn(name) \
void (name)(pixel *dst, ptrdiff_t stride, const pixel *topleft, \
- int width, int height, int angle)
+ int width, int height, int angle, int max_width, int max_height)
typedef decl_angular_ipred_fn(*angular_ipred_fn);
/*
--- a/src/ipred_tmpl.c
+++ b/src/ipred_tmpl.c
@@ -83,8 +83,7 @@
}
}
-static unsigned dc_gen_top(const pixel *const topleft, const int width)
-{
+static unsigned dc_gen_top(const pixel *const topleft, const int width) {
unsigned dc = width >> 1;
for (int i = 0; i < width; i++)
dc += topleft[1 + i];
@@ -93,7 +92,8 @@
static void ipred_dc_top_c(pixel *dst, const ptrdiff_t stride,
const pixel *const topleft,
- const int width, const int height, const int a)
+ const int width, const int height, const int a,
+ const int max_width, const int max_height)
{
splat_dc(dst, stride, width, height, dc_gen_top(topleft, width));
}
@@ -106,8 +106,7 @@
cfl_pred(dst, stride, width, height, dc_gen_top(topleft, width), ac, alpha);
}
-static unsigned dc_gen_left(const pixel *const topleft, const int height)
-{
+static unsigned dc_gen_left(const pixel *const topleft, const int height) {
unsigned dc = height >> 1;
for (int i = 0; i < height; i++)
dc += topleft[-(1 + i)];
@@ -116,7 +115,8 @@
static void ipred_dc_left_c(pixel *dst, const ptrdiff_t stride,
const pixel *const topleft,
- const int width, const int height, const int a)
+ const int width, const int height, const int a,
+ const int max_width, const int max_height)
{
splat_dc(dst, stride, width, height, dc_gen_left(topleft, height));
}
@@ -140,8 +140,8 @@
#define BASE_SHIFT 17
#endif
-static unsigned
-dc_gen(const pixel *const topleft, const int width, const int height)
+static unsigned dc_gen(const pixel *const topleft,
+ const int width, const int height)
{
unsigned dc = (width + height) >> 1;
for (int i = 0; i < width; i++)
@@ -160,7 +160,8 @@
static void ipred_dc_c(pixel *dst, const ptrdiff_t stride,
const pixel *const topleft,
- const int width, const int height, const int a)
+ const int width, const int height, const int a,
+ const int max_width, const int max_height)
{
splat_dc(dst, stride, width, height, dc_gen(topleft, width, height));
}
@@ -180,7 +181,8 @@
static void ipred_dc_128_c(pixel *dst, const ptrdiff_t stride,
const pixel *const topleft,
- const int width, const int height, const int a)
+ const int width, const int height, const int a,
+ const int max_width, const int max_height)
{
splat_dc(dst, stride, width, height, 1 << (BITDEPTH - 1));
}
@@ -195,7 +197,8 @@
static void ipred_v_c(pixel *dst, const ptrdiff_t stride,
const pixel *const topleft,
- const int width, const int height, const int a)
+ const int width, const int height, const int a,
+ const int max_width, const int max_height)
{
for (int y = 0; y < height; y++) {
pixel_copy(dst, topleft + 1, width);
@@ -205,7 +208,8 @@
static void ipred_h_c(pixel *dst, const ptrdiff_t stride,
const pixel *const topleft,
- const int width, const int height, const int a)
+ const int width, const int height, const int a,
+ const int max_width, const int max_height)
{
for (int y = 0; y < height; y++) {
pixel_set(dst, topleft[-(1 + y)], width);
@@ -215,7 +219,8 @@
static void ipred_paeth_c(pixel *dst, const ptrdiff_t stride,
const pixel *const tl_ptr,
- const int width, const int height, const int a)
+ const int width, const int height, const int a,
+ const int max_width, const int max_height)
{
const int topleft = tl_ptr[0];
for (int y = 0; y < height; y++) {
@@ -236,7 +241,8 @@
static void ipred_smooth_c(pixel *dst, const ptrdiff_t stride,
const pixel *const topleft,
- const int width, const int height, const int a)
+ const int width, const int height, const int a,
+ const int max_width, const int max_height)
{
const uint8_t *const weights_hor = &dav1d_sm_weights[width];
const uint8_t *const weights_ver = &dav1d_sm_weights[height];
@@ -256,7 +262,8 @@
static void ipred_smooth_v_c(pixel *dst, const ptrdiff_t stride,
const pixel *const topleft,
- const int width, const int height, const int a)
+ const int width, const int height, const int a,
+ const int max_width, const int max_height)
{
const uint8_t *const weights_ver = &dav1d_sm_weights[height];
const int bottom = topleft[-height];
@@ -273,7 +280,8 @@
static void ipred_smooth_h_c(pixel *dst, const ptrdiff_t stride,
const pixel *const topleft,
- const int width, const int height, const int a)
+ const int width, const int height, const int a,
+ const int max_width, const int max_height)
{
const uint8_t *const weights_hor = &dav1d_sm_weights[width];
const int right = topleft[width];
@@ -328,7 +336,9 @@
return strength;
}
-static void filter_edge(pixel *const out, const int sz, const pixel *const in,
+static void filter_edge(pixel *const out, const int sz,
+ const int lim_from, const int lim_to,
+ const pixel *const in,
const int from, const int to, const unsigned strength)
{
static const uint8_t kernel[3][5] = {
@@ -338,12 +348,17 @@
};
assert(strength > 0);
- for (int i = 0; i < sz; i++) {
+ int i = 0;
+ for (; i < lim_from; i++)
+ out[i] = in[iclip(i, from, to - 1)];
+ for (; i < imin(lim_to, sz); i++) {
int s = 0;
for (int j = 0; j < 5; j++)
s += in[iclip(i - 2 + j, from, to - 1)] * kernel[strength - 1][j];
out[i] = (s + 8) >> 4;
}
+ for (; i < sz; i++)
+ out[i] = in[iclip(i, from, to - 1)];
}
static int get_upsample(const int blk_wh, const unsigned d, const int type) {
@@ -369,7 +384,8 @@
static void ipred_z1_c(pixel *dst, const ptrdiff_t stride,
const pixel *const topleft_in,
- const int width, const int height, int angle)
+ const int width, const int height, int angle,
+ const int max_width, const int max_height)
{
const int is_sm = angle >> 9;
angle &= 511;
@@ -389,7 +405,7 @@
get_filter_strength(width + height, 90 - angle, is_sm);
if (filter_strength) {
- filter_edge(top_out, width + height,
+ filter_edge(top_out, width + height, 0, width + height,
&topleft_in[1], -1, width + imin(width, height),
filter_strength);
top = top_out;
@@ -421,7 +437,8 @@
static void ipred_z2_c(pixel *dst, const ptrdiff_t stride,
const pixel *const topleft_in,
- const int width, const int height, int angle)
+ const int width, const int height, int angle,
+ const int max_width, const int max_height)
{
const int is_sm = angle >> 9;
angle &= 511;
@@ -440,7 +457,8 @@
get_filter_strength(width + height, angle - 90, is_sm);
if (filter_strength) {
- filter_edge(&topleft[1], width, &topleft_in[1], -1, width,
+ filter_edge(&topleft[1], width, 0, max_width,
+ &topleft_in[1], -1, width,
filter_strength);
} else {
pixel_copy(&topleft[1], &topleft_in[1], width);
@@ -453,7 +471,8 @@
get_filter_strength(width + height, 180 - angle, is_sm);
if (filter_strength) {
- filter_edge(&topleft[-height], height, &topleft_in[-height],
+ filter_edge(&topleft[-height], height, height - max_height, height,
+ &topleft_in[-height],
0, height + 1, filter_strength);
} else {
pixel_copy(&topleft[-height], &topleft_in[-height], height);
@@ -492,7 +511,8 @@
static void ipred_z3_c(pixel *dst, const ptrdiff_t stride,
const pixel *const topleft_in,
- const int width, const int height, int angle)
+ const int width, const int height, int angle,
+ const int max_width, const int max_height)
{
const int is_sm = angle >> 9;
angle &= 511;
@@ -513,7 +533,7 @@
get_filter_strength(width + height, angle - 180, is_sm);
if (filter_strength) {
- filter_edge(left_out, width + height,
+ filter_edge(left_out, width + height, 0, width + height,
&topleft_in[-(width + height)],
imax(width - height, 0), width + height + 1,
filter_strength);
@@ -548,7 +568,8 @@
/* Up to 32x32 only */
static void ipred_filter_c(pixel *dst, const ptrdiff_t stride,
const pixel *const topleft_in,
- const int width, const int height, int filt_idx)
+ const int width, const int height, int filt_idx,
+ const int max_width, const int max_height)
{
filt_idx &= 511;
assert(filt_idx < 5);
--- a/src/recon_tmpl.c
+++ b/src/recon_tmpl.c
@@ -766,7 +766,9 @@
t_dim->w, t_dim->h, edge);
dsp->ipred.intra_pred[m](dst, f->cur.p.stride[0], edge,
t_dim->w * 4, t_dim->h * 4,
- angle | sm_fl);
+ angle | sm_fl,
+ f->cur.p.p.w - 4 * t->bx,
+ f->cur.p.p.h - 4 * t->by);
if (DEBUG_BLOCK_INFO && DEBUG_B_PIXELS) {
hex_dump(edge - t_dim->h * 4, t_dim->h * 4,
@@ -981,7 +983,11 @@
dsp->ipred.intra_pred[m](dst, stride, edge,
uv_t_dim->w * 4,
uv_t_dim->h * 4,
- angle | sm_uv_fl);
+ angle | sm_uv_fl,
+ (f->cur.p.p.w + ss_hor -
+ 4 * (t->bx & ~ss_hor)) >> ss_hor,
+ (f->cur.p.p.w + ss_ver -
+ 4 * (t->by & ~ss_ver)) >> ss_ver);
if (DEBUG_BLOCK_INFO && DEBUG_B_PIXELS) {
hex_dump(edge - uv_t_dim->h * 4, uv_t_dim->h * 4,
uv_t_dim->h * 4, 2, "l");
@@ -1136,7 +1142,7 @@
0, dst, f->cur.p.stride[0], top_sb_edge,
m, &angle, bw4, bh4, tl_edge);
dsp->ipred.intra_pred[m](tmp, 4 * bw4 * sizeof(pixel),
- tl_edge, bw4 * 4, bh4 * 4, 0);
+ tl_edge, bw4 * 4, bh4 * 4, 0, 0, 0);
const uint8_t *const ii_mask =
b->interintra_type == INTER_INTRA_BLEND ?
dav1d_ii_masks[bs][0][b->interintra_mode] :
@@ -1273,7 +1279,7 @@
top_sb_edge, m,
&angle, cbw4, cbh4, tl_edge);
dsp->ipred.intra_pred[m](tmp, cbw4 * 4 * sizeof(pixel),
- tl_edge, cbw4 * 4, cbh4 * 4, 0);
+ tl_edge, cbw4 * 4, cbh4 * 4, 0, 0, 0);
dsp->mc.blend(uvdst, f->cur.p.stride[1], tmp,
cbw4 * 4, cbh4 * 4, ii_mask);
}
--- a/tests/checkasm/ipred.c
+++ b/tests/checkasm/ipred.c
@@ -68,7 +68,7 @@
pixel *const topleft = topleft_buf + 128;
declare_func(void, pixel *dst, ptrdiff_t stride, const pixel *topleft,
- int width, int height, int angle);
+ int width, int height, int angle, int max_width, int max_height);
for (int mode = 0; mode < N_IMPL_INTRA_PRED_MODES; mode++)
for (int w = 4; w <= (mode == FILTER_PRED ? 32 : 64); w <<= 1)
@@ -89,12 +89,13 @@
for (int i = -h * 2; i <= w * 2; i++)
topleft[i] = rand() & ((1 << BITDEPTH) - 1);
- call_ref(c_dst, stride, topleft, w, h, a);
- call_new(a_dst, stride, topleft, w, h, a);
+ const int maxw = 1 + (rand() % 128), maxh = 1 + (rand() % 128);
+ call_ref(c_dst, stride, topleft, w, h, a, maxw, maxh);
+ call_new(a_dst, stride, topleft, w, h, a, maxw, maxh);
if (memcmp(c_dst, a_dst, w * h * sizeof(*c_dst)))
fail();
- bench_new(a_dst, stride, topleft, w, h, a);
+ bench_new(a_dst, stride, topleft, w, h, a, 128, 128);
}
}
report("intra_pred");