shithub: dav1d

Download patch

ref: a9a09b7022ef85fce334f4a7b79e3dde818a68f9
parent: 7775e15a416f7df2792547db1e39df568b17c844
author: Ronald S. Bultje <rsbultje@gmail.com>
date: Thu Nov 8 08:18:19 EST 2018

cdef: simplify sec_taps

Also reduce scope of tables to inside the function where they are used.

--- a/src/cdef_tmpl.c
+++ b/src/cdef_tmpl.c
@@ -34,19 +34,6 @@
 
 #include "src/cdef.h"
 
-static const int8_t cdef_directions[8 /* dir */][2 /* pass */] = {
-    { -1 * 12 + 1, -2 * 12 + 2 },
-    {  0 * 12 + 1, -1 * 12 + 2 },
-    {  0 * 12 + 1,  0 * 12 + 2 },
-    {  0 * 12 + 1,  1 * 12 + 2 },
-    {  1 * 12 + 1,  2 * 12 + 2 },
-    {  1 * 12 + 0,  2 * 12 + 1 },
-    {  1 * 12 + 0,  2 * 12 + 0 },
-    {  1 * 12 + 0,  2 * 12 - 1 }
-};
-static const uint8_t cdef_pri_taps[2][2] = { { 4, 2 }, { 3, 3 } };
-static const uint8_t cdef_sec_taps[2][2] = { { 2, 1 }, { 2, 1 } };
-
 static inline int constrain(const int diff, const int threshold,
                             const int damping)
 {
@@ -112,12 +99,23 @@
                     const int sec_strength, const int dir,
                     const int damping, const enum CdefEdgeFlags edges)
 {
+    static const int8_t cdef_directions[8 /* dir */][2 /* pass */] = {
+        { -1 * 12 + 1, -2 * 12 + 2 },
+        {  0 * 12 + 1, -1 * 12 + 2 },
+        {  0 * 12 + 1,  0 * 12 + 2 },
+        {  0 * 12 + 1,  1 * 12 + 2 },
+        {  1 * 12 + 1,  2 * 12 + 2 },
+        {  1 * 12 + 0,  2 * 12 + 1 },
+        {  1 * 12 + 0,  2 * 12 + 0 },
+        {  1 * 12 + 0,  2 * 12 - 1 }
+    };
+    static const uint8_t cdef_pri_taps[2][2] = { { 4, 2 }, { 3, 3 } };
+    static const uint8_t sec_taps[2] = { 2, 1 };
     const ptrdiff_t tmp_stride = 12;
     assert((w == 4 || w == 8) && (h == 4 || h == 8));
     uint16_t tmp_buf[144];  // 12*12 is the maximum value of tmp_stride * (h + 4)
     uint16_t *tmp = tmp_buf + 2 * tmp_stride + 2;
     const uint8_t *const pri_taps = cdef_pri_taps[(pri_strength >> (BITDEPTH - 8)) & 1];
-    const uint8_t *const sec_taps = cdef_sec_taps[(pri_strength >> (BITDEPTH - 8)) & 1];
 
     padding(tmp, tmp_stride, dst, dst_stride, left, top, w, h, edges);
 
--- a/src/x86/cdef.asm
+++ b/src/x86/cdef.asm
@@ -37,7 +37,7 @@
 shufw_210xxxxx: db 4, 5, 2, 3, 0, 1, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
 pw_128: times 2 dw 128
 pw_2048: times 2 dw 2048
-tap_table: dw 4, 2, 3, 3, 2, 1, 2, 1
+tap_table: dw 4, 2, 3, 3, 2, 1
            db -1 * 16 + 1, -2 * 16 + 2
            db  0 * 16 + 1, -1 * 16 + 2
            db  0 * 16 + 1,  0 * 16 + 2
@@ -371,15 +371,14 @@
     vpbroadcastw    m0, xm0                     ; pri_strength
     vpbroadcastw    m1, xm1                     ; sec_strength
     and           prid, 1
-    and           secd, 1
     lea           tapq, [tap_table]
     lea           priq, [tapq+priq*4]           ; pri_taps
-    lea           secq, [tapq+secq*4+8]         ; sec_taps
+    lea           secq, [tapq+8]                ; sec_taps
 
-    ; off1/2/3[k] [6 total] from [tapq+16+(dir+0/2/6)*2+k]
+    ; off1/2/3[k] [6 total] from [tapq+12+(dir+0/2/6)*2+k]
     DEFINE_ARGS dst, stride, tap, dir, pri, sec, stride3
     mov           dird, r6m
-    lea           tapq, [tapq+dirq*2+16]
+    lea           tapq, [tapq+dirq*2+12]
 %if %1*%2*2/mmsize > 1
     DEFINE_ARGS dst, stride, dir, stk, pri, sec, h, off, k
     mov             hd, %1*%2*2/mmsize