shithub: dav1d

Download patch

ref: bfdfd1aa1dfe4a067a2887be53cd335e88b52308
parent: 7d1d7d28072632f944a51d59dcb4f90a9816cf2d
author: Ronald S. Bultje <rsbultje@gmail.com>
date: Thu Nov 1 11:48:36 EDT 2018

Simplify SGR C code

- remove unused entry from tables.h;
- use non-sized types for scalar values;
- reduce size of intermediate tables from int32 to int16.

--- a/src/looprestoration_tmpl.c
+++ b/src/looprestoration_tmpl.c
@@ -408,7 +408,7 @@
     }
 }
 
-static void selfguided_filter(int32_t *dst, const pixel *src,
+static void selfguided_filter(int16_t *dst, const pixel *src,
                               const ptrdiff_t src_stride, const int w,
                               const int h, const int n, const int s)
 {
@@ -441,8 +441,8 @@
             const int b =
                 (BB[i] + (1 << (BITDEPTH - 8) >> 1)) >> (BITDEPTH - 8);
 
-            const uint32_t p = (a * n >= b * b) * (a * n - b * b);
-            const uint32_t z = (p * s + (1 << 19)) >> 20;
+            const unsigned p = a * n - b * b;
+            const unsigned z = (p * s + (1 << 19)) >> 20;
 
             const int x = dav1d_sgr_x_by_xplus1[imin(z, 255)];
             // This is where we invert A and B, so that B is of size coef.
@@ -462,8 +462,8 @@
       P[i + 1 - REST_UNIT_STRIDE] + P[i + 1 + REST_UNIT_STRIDE]) * 5)
         for (; j < h - 1; j+=2) {
             for (int i = 0; i < w; i++) {
-                const int32_t a = SIX_NEIGHBORS(B, i);
-                const int32_t b = SIX_NEIGHBORS(A, i);
+                const int a = SIX_NEIGHBORS(B, i);
+                const int b = SIX_NEIGHBORS(A, i);
                 dst[i] = (a * src[i] + b + (1 << 8)) >> 9;
             }
             dst += 384 /* Maximum restoration width is 384 (256 * 1.5) */;
@@ -471,8 +471,8 @@
             B += REST_UNIT_STRIDE;
             A += REST_UNIT_STRIDE;
             for (int i = 0; i < w; i++) {
-                const int32_t a = B[i] * 6 + (B[i - 1] + B[i + 1]) * 5;
-                const int32_t b = A[i] * 6 + (A[i - 1] + A[i + 1]) * 5;
+                const int a = B[i] * 6 + (B[i - 1] + B[i + 1]) * 5;
+                const int b = A[i] * 6 + (A[i - 1] + A[i + 1]) * 5;
                 dst[i] = (a * src[i] + b + (1 << 7)) >> 8;
             }
             dst += 384 /* Maximum restoration width is 384 (256 * 1.5) */;
@@ -482,8 +482,8 @@
         }
         if (j + 1 == h) { // Last row, when number of rows is odd
             for (int i = 0; i < w; i++) {
-                const int32_t a = SIX_NEIGHBORS(B, i);
-                const int32_t b = SIX_NEIGHBORS(A, i);
+                const int a = SIX_NEIGHBORS(B, i);
+                const int b = SIX_NEIGHBORS(A, i);
                 dst[i] = (a * src[i] + b + (1 << 8)) >> 9;
             }
         }
@@ -495,8 +495,8 @@
       P[i + 1 - REST_UNIT_STRIDE] + P[i + 1 + REST_UNIT_STRIDE]) * 3)
         for (int j = 0; j < h; j++) {
             for (int i = 0; i < w; i++) {
-                const int32_t a = EIGHT_NEIGHBORS(B, i);
-                const int32_t b = EIGHT_NEIGHBORS(A, i);
+                const int a = EIGHT_NEIGHBORS(B, i);
+                const int b = EIGHT_NEIGHBORS(A, i);
                 dst[i] = (a * src[i] + b + (1 << 8)) >> 9;
             }
             dst += 384;
@@ -522,7 +522,7 @@
 
     // Selfguided filter outputs to a maximum stripe height of 64 and a
     // maximum restoration width of 384 (256 * 1.5)
-    int32_t dst[64 * 384];
+    int16_t dst[64 * 384];
 
     // both r1 and r0 can't be zero
     if (!dav1d_sgr_params[sgr_idx][0]) {
@@ -531,8 +531,8 @@
         const int w1 = (1 << 7) - sgr_w[1];
         for (int j = 0; j < h; j++) {
             for (int i = 0; i < w; i++) {
-                const int32_t u = (p[i] << 4);
-                const int32_t v = (u << 7) + w1 * (dst[j * 384 + i] - u);
+                const int u = (p[i] << 4);
+                const int v = (u << 7) + w1 * (dst[j * 384 + i] - u);
                 p[i] = iclip_pixel((v + (1 << 10)) >> 11);
             }
             p += PXSTRIDE(p_stride);
@@ -543,14 +543,14 @@
         const int w0 = sgr_w[0];
         for (int j = 0; j < h; j++) {
             for (int i = 0; i < w; i++) {
-                const int32_t u = (p[i] << 4);
-                const int32_t v = (u << 7) + w0 * (dst[j * 384 + i] - u);
+                const int u = (p[i] << 4);
+                const int v = (u << 7) + w0 * (dst[j * 384 + i] - u);
                 p[i] = iclip_pixel((v + (1 << 10)) >> 11);
             }
             p += PXSTRIDE(p_stride);
         }
     } else {
-        int32_t dst1[64 * 384];
+        int16_t dst1[64 * 384];
         const int s0 = dav1d_sgr_params[sgr_idx][2];
         const int s1 = dav1d_sgr_params[sgr_idx][3];
         const int w0 = sgr_w[0];
@@ -559,9 +559,9 @@
         selfguided_filter(dst1, tmp, REST_UNIT_STRIDE, w, h, 9, s1);
         for (int j = 0; j < h; j++) {
             for (int i = 0; i < w; i++) {
-                const int32_t u = (p[i] << 4);
-                const int32_t v = (u << 7) + w0 * (dst[j * 384 + i] - u) +
-                                  w1 * (dst1[j * 384 + i] - u);
+                const int u = (p[i] << 4);
+                const int v = (u << 7) + w0 * (dst[j * 384 + i] - u) +
+                              w1 * (dst1[j * 384 + i] - u);
                 p[i] = iclip_pixel((v + (1 << 10)) >> 11);
             }
             p += PXSTRIDE(p_stride);
--- a/src/tables.h
+++ b/src/tables.h
@@ -108,7 +108,6 @@
 
 extern const int16_t dav1d_sgr_params[16][4];
 extern const int16_t dav1d_sgr_x_by_xplus1[256];
-extern const int16_t dav1d_sgr_one_by_x[25];
 
 extern const int8_t dav1d_mc_subpel_filters[5][15][8];
 extern const int8_t dav1d_mc_warp_filter[][8];