ref: bfdfd1aa1dfe4a067a2887be53cd335e88b52308
parent: 7d1d7d28072632f944a51d59dcb4f90a9816cf2d
author: Ronald S. Bultje <rsbultje@gmail.com>
date: Thu Nov 1 11:48:36 EDT 2018
Simplify SGR C code - remove unused entry from tables.h; - use non-sized types for scalar values; - reduce size of intermediate tables from int32 to int16.
--- a/src/looprestoration_tmpl.c
+++ b/src/looprestoration_tmpl.c
@@ -408,7 +408,7 @@
}
}
-static void selfguided_filter(int32_t *dst, const pixel *src,
+static void selfguided_filter(int16_t *dst, const pixel *src,
const ptrdiff_t src_stride, const int w,
const int h, const int n, const int s)
{
@@ -441,8 +441,8 @@
const int b =
(BB[i] + (1 << (BITDEPTH - 8) >> 1)) >> (BITDEPTH - 8);
- const uint32_t p = (a * n >= b * b) * (a * n - b * b);
- const uint32_t z = (p * s + (1 << 19)) >> 20;
+ const unsigned p = a * n - b * b;
+ const unsigned z = (p * s + (1 << 19)) >> 20;
const int x = dav1d_sgr_x_by_xplus1[imin(z, 255)];
// This is where we invert A and B, so that B is of size coef.
@@ -462,8 +462,8 @@
P[i + 1 - REST_UNIT_STRIDE] + P[i + 1 + REST_UNIT_STRIDE]) * 5)
for (; j < h - 1; j+=2) {
for (int i = 0; i < w; i++) {
- const int32_t a = SIX_NEIGHBORS(B, i);
- const int32_t b = SIX_NEIGHBORS(A, i);
+ const int a = SIX_NEIGHBORS(B, i);
+ const int b = SIX_NEIGHBORS(A, i);
dst[i] = (a * src[i] + b + (1 << 8)) >> 9;
}
dst += 384 /* Maximum restoration width is 384 (256 * 1.5) */;
@@ -471,8 +471,8 @@
B += REST_UNIT_STRIDE;
A += REST_UNIT_STRIDE;
for (int i = 0; i < w; i++) {
- const int32_t a = B[i] * 6 + (B[i - 1] + B[i + 1]) * 5;
- const int32_t b = A[i] * 6 + (A[i - 1] + A[i + 1]) * 5;
+ const int a = B[i] * 6 + (B[i - 1] + B[i + 1]) * 5;
+ const int b = A[i] * 6 + (A[i - 1] + A[i + 1]) * 5;
dst[i] = (a * src[i] + b + (1 << 7)) >> 8;
}
dst += 384 /* Maximum restoration width is 384 (256 * 1.5) */;
@@ -482,8 +482,8 @@
}
if (j + 1 == h) { // Last row, when number of rows is odd
for (int i = 0; i < w; i++) {
- const int32_t a = SIX_NEIGHBORS(B, i);
- const int32_t b = SIX_NEIGHBORS(A, i);
+ const int a = SIX_NEIGHBORS(B, i);
+ const int b = SIX_NEIGHBORS(A, i);
dst[i] = (a * src[i] + b + (1 << 8)) >> 9;
}
}
@@ -495,8 +495,8 @@
P[i + 1 - REST_UNIT_STRIDE] + P[i + 1 + REST_UNIT_STRIDE]) * 3)
for (int j = 0; j < h; j++) {
for (int i = 0; i < w; i++) {
- const int32_t a = EIGHT_NEIGHBORS(B, i);
- const int32_t b = EIGHT_NEIGHBORS(A, i);
+ const int a = EIGHT_NEIGHBORS(B, i);
+ const int b = EIGHT_NEIGHBORS(A, i);
dst[i] = (a * src[i] + b + (1 << 8)) >> 9;
}
dst += 384;
@@ -522,7 +522,7 @@
// Selfguided filter outputs to a maximum stripe height of 64 and a
// maximum restoration width of 384 (256 * 1.5)
- int32_t dst[64 * 384];
+ int16_t dst[64 * 384];
// both r1 and r0 can't be zero
if (!dav1d_sgr_params[sgr_idx][0]) {
@@ -531,8 +531,8 @@
const int w1 = (1 << 7) - sgr_w[1];
for (int j = 0; j < h; j++) {
for (int i = 0; i < w; i++) {
- const int32_t u = (p[i] << 4);
- const int32_t v = (u << 7) + w1 * (dst[j * 384 + i] - u);
+ const int u = (p[i] << 4);
+ const int v = (u << 7) + w1 * (dst[j * 384 + i] - u);
p[i] = iclip_pixel((v + (1 << 10)) >> 11);
}
p += PXSTRIDE(p_stride);
@@ -543,14 +543,14 @@
const int w0 = sgr_w[0];
for (int j = 0; j < h; j++) {
for (int i = 0; i < w; i++) {
- const int32_t u = (p[i] << 4);
- const int32_t v = (u << 7) + w0 * (dst[j * 384 + i] - u);
+ const int u = (p[i] << 4);
+ const int v = (u << 7) + w0 * (dst[j * 384 + i] - u);
p[i] = iclip_pixel((v + (1 << 10)) >> 11);
}
p += PXSTRIDE(p_stride);
}
} else {
- int32_t dst1[64 * 384];
+ int16_t dst1[64 * 384];
const int s0 = dav1d_sgr_params[sgr_idx][2];
const int s1 = dav1d_sgr_params[sgr_idx][3];
const int w0 = sgr_w[0];
@@ -559,9 +559,9 @@
selfguided_filter(dst1, tmp, REST_UNIT_STRIDE, w, h, 9, s1);
for (int j = 0; j < h; j++) {
for (int i = 0; i < w; i++) {
- const int32_t u = (p[i] << 4);
- const int32_t v = (u << 7) + w0 * (dst[j * 384 + i] - u) +
- w1 * (dst1[j * 384 + i] - u);
+ const int u = (p[i] << 4);
+ const int v = (u << 7) + w0 * (dst[j * 384 + i] - u) +
+ w1 * (dst1[j * 384 + i] - u);
p[i] = iclip_pixel((v + (1 << 10)) >> 11);
}
p += PXSTRIDE(p_stride);
--- a/src/tables.h
+++ b/src/tables.h
@@ -108,7 +108,6 @@
extern const int16_t dav1d_sgr_params[16][4];
extern const int16_t dav1d_sgr_x_by_xplus1[256];
-extern const int16_t dav1d_sgr_one_by_x[25];
extern const int8_t dav1d_mc_subpel_filters[5][15][8];
extern const int8_t dav1d_mc_warp_filter[][8];