ref: a9a09b7022ef85fce334f4a7b79e3dde818a68f9
parent: 7775e15a416f7df2792547db1e39df568b17c844
author: Ronald S. Bultje <rsbultje@gmail.com>
date: Thu Nov 8 08:18:19 EST 2018
cdef: simplify sec_taps Also reduce scope of tables to inside the function where they are used.
--- a/src/cdef_tmpl.c
+++ b/src/cdef_tmpl.c
@@ -34,19 +34,6 @@
#include "src/cdef.h"
-static const int8_t cdef_directions[8 /* dir */][2 /* pass */] = {
- { -1 * 12 + 1, -2 * 12 + 2 },
- { 0 * 12 + 1, -1 * 12 + 2 },
- { 0 * 12 + 1, 0 * 12 + 2 },
- { 0 * 12 + 1, 1 * 12 + 2 },
- { 1 * 12 + 1, 2 * 12 + 2 },
- { 1 * 12 + 0, 2 * 12 + 1 },
- { 1 * 12 + 0, 2 * 12 + 0 },
- { 1 * 12 + 0, 2 * 12 - 1 }
-};
-static const uint8_t cdef_pri_taps[2][2] = { { 4, 2 }, { 3, 3 } };
-static const uint8_t cdef_sec_taps[2][2] = { { 2, 1 }, { 2, 1 } };
-
static inline int constrain(const int diff, const int threshold,
const int damping)
{
@@ -112,12 +99,23 @@
const int sec_strength, const int dir,
const int damping, const enum CdefEdgeFlags edges)
{
+ static const int8_t cdef_directions[8 /* dir */][2 /* pass */] = {
+ { -1 * 12 + 1, -2 * 12 + 2 },
+ { 0 * 12 + 1, -1 * 12 + 2 },
+ { 0 * 12 + 1, 0 * 12 + 2 },
+ { 0 * 12 + 1, 1 * 12 + 2 },
+ { 1 * 12 + 1, 2 * 12 + 2 },
+ { 1 * 12 + 0, 2 * 12 + 1 },
+ { 1 * 12 + 0, 2 * 12 + 0 },
+ { 1 * 12 + 0, 2 * 12 - 1 }
+ };
+ static const uint8_t cdef_pri_taps[2][2] = { { 4, 2 }, { 3, 3 } };
+ static const uint8_t sec_taps[2] = { 2, 1 };
const ptrdiff_t tmp_stride = 12;
assert((w == 4 || w == 8) && (h == 4 || h == 8));
uint16_t tmp_buf[144]; // 12*12 is the maximum value of tmp_stride * (h + 4)
uint16_t *tmp = tmp_buf + 2 * tmp_stride + 2;
const uint8_t *const pri_taps = cdef_pri_taps[(pri_strength >> (BITDEPTH - 8)) & 1];
- const uint8_t *const sec_taps = cdef_sec_taps[(pri_strength >> (BITDEPTH - 8)) & 1];
padding(tmp, tmp_stride, dst, dst_stride, left, top, w, h, edges);
--- a/src/x86/cdef.asm
+++ b/src/x86/cdef.asm
@@ -37,7 +37,7 @@
shufw_210xxxxx: db 4, 5, 2, 3, 0, 1, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
pw_128: times 2 dw 128
pw_2048: times 2 dw 2048
-tap_table: dw 4, 2, 3, 3, 2, 1, 2, 1
+tap_table: dw 4, 2, 3, 3, 2, 1
db -1 * 16 + 1, -2 * 16 + 2
db 0 * 16 + 1, -1 * 16 + 2
db 0 * 16 + 1, 0 * 16 + 2
@@ -371,15 +371,14 @@
vpbroadcastw m0, xm0 ; pri_strength
vpbroadcastw m1, xm1 ; sec_strength
and prid, 1
- and secd, 1
lea tapq, [tap_table]
lea priq, [tapq+priq*4] ; pri_taps
- lea secq, [tapq+secq*4+8] ; sec_taps
+ lea secq, [tapq+8] ; sec_taps
- ; off1/2/3[k] [6 total] from [tapq+16+(dir+0/2/6)*2+k]
+ ; off1/2/3[k] [6 total] from [tapq+12+(dir+0/2/6)*2+k]
DEFINE_ARGS dst, stride, tap, dir, pri, sec, stride3
mov dird, r6m
- lea tapq, [tapq+dirq*2+16]
+ lea tapq, [tapq+dirq*2+12]
%if %1*%2*2/mmsize > 1
DEFINE_ARGS dst, stride, dir, stk, pri, sec, h, off, k
mov hd, %1*%2*2/mmsize