shithub: dav1d

Download patch

ref: e214351b0e485f68d0c7a03f726e8a694f0a8cbf
parent: 46e2a2d0cc451e1d6bb929f80088f8a7b8940dd0
author: Janne Grunau <janne-vlc@jannau.net>
date: Thu Oct 25 16:03:51 EDT 2018

rename arch specific bitdepth template files

Missed in 46e2a2d0cc. Arm asm will be hard to template so move them to
the plain source list.

Fix #96.

--- a/src/arm/32/mc.S
+++ b/src/arm/32/mc.S
@@ -28,8 +28,6 @@
 
 #include "src/arm/asm.S"
 
-#if BITDEPTH == 8
-
 .macro avg dst0, dst1, t0, t1, t2, t3
         vld1.16         {\t0,\t1},   [r2, :128]!
         vld1.16         {\t2,\t3},   [r3, :128]!
@@ -215,5 +213,3 @@
 bidir_fn avg
 bidir_fn w_avg
 bidir_fn mask
-
-#endif /* BITDEPTH == 8 */
--- a/src/arm/64/mc.S
+++ b/src/arm/64/mc.S
@@ -27,8 +27,6 @@
 
 #include "src/arm/asm.S"
 
-#if BITDEPTH == 8
-
 .macro avg dst, t0, t1
         ld1             {\t0\().8h},   [x2],  16
         ld1             {\t1\().8h},   [x3],  16
@@ -233,5 +231,3 @@
 bidir_fn avg
 bidir_fn w_avg
 bidir_fn mask
-
-#endif /* BITDEPTH == 8 */
--- a/src/arm/mc_init.c
+++ /dev/null
@@ -1,47 +1,0 @@
-/*
- * Copyright © 2018, VideoLAN and dav1d authors
- * Copyright © 2018, Two Orioles, LLC
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * 1. Redistributions of source code must retain the above copyright notice, this
- *    list of conditions and the following disclaimer.
- *
- * 2. Redistributions in binary form must reproduce the above copyright notice,
- *    this list of conditions and the following disclaimer in the documentation
- *    and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
- * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
- * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
- * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
- * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
- * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
- * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
- * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-#include "config.h"
-
-#include "src/mc.h"
-#include "src/cpu.h"
-
-decl_avg_fn(dav1d_avg_8bpc_neon);
-decl_w_avg_fn(dav1d_w_avg_8bpc_neon);
-decl_mask_fn(dav1d_mask_8bpc_neon);
-
-void bitfn(dav1d_mc_dsp_init_arm)(Dav1dMCDSPContext *const c) {
-    const unsigned flags = dav1d_get_cpu_flags();
-
-    if (!(flags & DAV1D_ARM_CPU_FLAG_NEON)) return;
-
-#if BITDEPTH == 8
-    c->avg = dav1d_avg_8bpc_neon;
-    c->w_avg = dav1d_w_avg_8bpc_neon;
-    c->mask = dav1d_mask_8bpc_neon;
-#endif
-}
--- /dev/null
+++ b/src/arm/mc_init_tmpl.c
@@ -1,0 +1,47 @@
+/*
+ * Copyright © 2018, VideoLAN and dav1d authors
+ * Copyright © 2018, Two Orioles, LLC
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice, this
+ *    list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ *    this list of conditions and the following disclaimer in the documentation
+ *    and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
+ * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "config.h"
+
+#include "src/mc.h"
+#include "src/cpu.h"
+
+decl_avg_fn(dav1d_avg_8bpc_neon);
+decl_w_avg_fn(dav1d_w_avg_8bpc_neon);
+decl_mask_fn(dav1d_mask_8bpc_neon);
+
+void bitfn(dav1d_mc_dsp_init_arm)(Dav1dMCDSPContext *const c) {
+    const unsigned flags = dav1d_get_cpu_flags();
+
+    if (!(flags & DAV1D_ARM_CPU_FLAG_NEON)) return;
+
+#if BITDEPTH == 8
+    c->avg = dav1d_avg_8bpc_neon;
+    c->w_avg = dav1d_w_avg_8bpc_neon;
+    c->mask = dav1d_mask_8bpc_neon;
+#endif
+}
--- a/src/meson.build
+++ b/src/meson.build
@@ -83,14 +83,14 @@
             'arm/cpu.c',
         )
         libdav1d_tmpl_sources += files(
-            'arm/mc_init.c',
+            'arm/mc_init_tmpl.c',
         )
         if host_machine.cpu_family() == 'aarch64'
-            libdav1d_tmpl_sources += files(
+            libdav1d_sources += files(
                 'arm/64/mc.S',
             )
         elif host_machine.cpu_family().startswith('arm')
-            libdav1d_tmpl_sources += files(
+            libdav1d_sources += files(
                 'arm/32/mc.S',
             )
         endif
@@ -101,11 +101,11 @@
         )
 
         libdav1d_tmpl_sources += files(
-            'x86/ipred_init.c',
-            'x86/itx_init.c',
-            'x86/loopfilter_init.c',
-            'x86/looprestoration_init.c',
-            'x86/mc_init.c',
+            'x86/ipred_init_tmpl.c',
+            'x86/itx_init_tmpl.c',
+            'x86/loopfilter_init_tmpl.c',
+            'x86/looprestoration_init_tmpl.c',
+            'x86/mc_init_tmpl.c',
         )
 
         # NASM source files
--- a/src/x86/ipred_init.c
+++ /dev/null
@@ -1,75 +1,0 @@
-/*
- * Copyright © 2018, VideoLAN and dav1d authors
- * Copyright © 2018, Two Orioles, LLC
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * 1. Redistributions of source code must retain the above copyright notice, this
- *    list of conditions and the following disclaimer.
- *
- * 2. Redistributions in binary form must reproduce the above copyright notice,
- *    this list of conditions and the following disclaimer in the documentation
- *    and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
- * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
- * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
- * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
- * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
- * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
- * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
- * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-#include "src/cpu.h"
-#include "src/ipred.h"
-
-decl_angular_ipred_fn(dav1d_ipred_dc_avx2);
-decl_angular_ipred_fn(dav1d_ipred_dc_128_avx2);
-decl_angular_ipred_fn(dav1d_ipred_dc_top_avx2);
-decl_angular_ipred_fn(dav1d_ipred_dc_left_avx2);
-decl_angular_ipred_fn(dav1d_ipred_h_avx2);
-decl_angular_ipred_fn(dav1d_ipred_v_avx2);
-decl_angular_ipred_fn(dav1d_ipred_paeth_avx2);
-decl_angular_ipred_fn(dav1d_ipred_smooth_avx2);
-decl_angular_ipred_fn(dav1d_ipred_smooth_v_avx2);
-decl_angular_ipred_fn(dav1d_ipred_smooth_h_avx2);
-decl_angular_ipred_fn(dav1d_ipred_filter_avx2);
-
-decl_cfl_pred_fn(dav1d_ipred_cfl_avx2);
-decl_cfl_pred_fn(dav1d_ipred_cfl_128_avx2);
-decl_cfl_pred_fn(dav1d_ipred_cfl_top_avx2);
-decl_cfl_pred_fn(dav1d_ipred_cfl_left_avx2);
-
-decl_pal_pred_fn(dav1d_pal_pred_avx2);
-
-void bitfn(dav1d_intra_pred_dsp_init_x86)(Dav1dIntraPredDSPContext *const c) {
-    const unsigned flags = dav1d_get_cpu_flags();
-
-    if (!(flags & DAV1D_X86_CPU_FLAG_AVX2)) return;
-
-#if BITDEPTH == 8 && ARCH_X86_64
-    c->intra_pred[DC_PRED]       = dav1d_ipred_dc_avx2;
-    c->intra_pred[DC_128_PRED]   = dav1d_ipred_dc_128_avx2;
-    c->intra_pred[TOP_DC_PRED]   = dav1d_ipred_dc_top_avx2;
-    c->intra_pred[LEFT_DC_PRED]  = dav1d_ipred_dc_left_avx2;
-    c->intra_pred[HOR_PRED]      = dav1d_ipred_h_avx2;
-    c->intra_pred[VERT_PRED]     = dav1d_ipred_v_avx2;
-    c->intra_pred[PAETH_PRED]    = dav1d_ipred_paeth_avx2;
-    c->intra_pred[SMOOTH_PRED]   = dav1d_ipred_smooth_avx2;
-    c->intra_pred[SMOOTH_V_PRED] = dav1d_ipred_smooth_v_avx2;
-    c->intra_pred[SMOOTH_H_PRED] = dav1d_ipred_smooth_h_avx2;
-    c->intra_pred[FILTER_PRED]   = dav1d_ipred_filter_avx2;
-
-    c->cfl_pred[DC_PRED]      = dav1d_ipred_cfl_avx2;
-    c->cfl_pred[DC_128_PRED]  = dav1d_ipred_cfl_128_avx2;
-    c->cfl_pred[TOP_DC_PRED]  = dav1d_ipred_cfl_top_avx2;
-    c->cfl_pred[LEFT_DC_PRED] = dav1d_ipred_cfl_left_avx2;
-
-    c->pal_pred = dav1d_pal_pred_avx2;
-#endif
-}
--- /dev/null
+++ b/src/x86/ipred_init_tmpl.c
@@ -1,0 +1,75 @@
+/*
+ * Copyright © 2018, VideoLAN and dav1d authors
+ * Copyright © 2018, Two Orioles, LLC
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice, this
+ *    list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ *    this list of conditions and the following disclaimer in the documentation
+ *    and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
+ * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "src/cpu.h"
+#include "src/ipred.h"
+
+decl_angular_ipred_fn(dav1d_ipred_dc_avx2);
+decl_angular_ipred_fn(dav1d_ipred_dc_128_avx2);
+decl_angular_ipred_fn(dav1d_ipred_dc_top_avx2);
+decl_angular_ipred_fn(dav1d_ipred_dc_left_avx2);
+decl_angular_ipred_fn(dav1d_ipred_h_avx2);
+decl_angular_ipred_fn(dav1d_ipred_v_avx2);
+decl_angular_ipred_fn(dav1d_ipred_paeth_avx2);
+decl_angular_ipred_fn(dav1d_ipred_smooth_avx2);
+decl_angular_ipred_fn(dav1d_ipred_smooth_v_avx2);
+decl_angular_ipred_fn(dav1d_ipred_smooth_h_avx2);
+decl_angular_ipred_fn(dav1d_ipred_filter_avx2);
+
+decl_cfl_pred_fn(dav1d_ipred_cfl_avx2);
+decl_cfl_pred_fn(dav1d_ipred_cfl_128_avx2);
+decl_cfl_pred_fn(dav1d_ipred_cfl_top_avx2);
+decl_cfl_pred_fn(dav1d_ipred_cfl_left_avx2);
+
+decl_pal_pred_fn(dav1d_pal_pred_avx2);
+
+void bitfn(dav1d_intra_pred_dsp_init_x86)(Dav1dIntraPredDSPContext *const c) {
+    const unsigned flags = dav1d_get_cpu_flags();
+
+    if (!(flags & DAV1D_X86_CPU_FLAG_AVX2)) return;
+
+#if BITDEPTH == 8 && ARCH_X86_64
+    c->intra_pred[DC_PRED]       = dav1d_ipred_dc_avx2;
+    c->intra_pred[DC_128_PRED]   = dav1d_ipred_dc_128_avx2;
+    c->intra_pred[TOP_DC_PRED]   = dav1d_ipred_dc_top_avx2;
+    c->intra_pred[LEFT_DC_PRED]  = dav1d_ipred_dc_left_avx2;
+    c->intra_pred[HOR_PRED]      = dav1d_ipred_h_avx2;
+    c->intra_pred[VERT_PRED]     = dav1d_ipred_v_avx2;
+    c->intra_pred[PAETH_PRED]    = dav1d_ipred_paeth_avx2;
+    c->intra_pred[SMOOTH_PRED]   = dav1d_ipred_smooth_avx2;
+    c->intra_pred[SMOOTH_V_PRED] = dav1d_ipred_smooth_v_avx2;
+    c->intra_pred[SMOOTH_H_PRED] = dav1d_ipred_smooth_h_avx2;
+    c->intra_pred[FILTER_PRED]   = dav1d_ipred_filter_avx2;
+
+    c->cfl_pred[DC_PRED]      = dav1d_ipred_cfl_avx2;
+    c->cfl_pred[DC_128_PRED]  = dav1d_ipred_cfl_128_avx2;
+    c->cfl_pred[TOP_DC_PRED]  = dav1d_ipred_cfl_top_avx2;
+    c->cfl_pred[LEFT_DC_PRED] = dav1d_ipred_cfl_left_avx2;
+
+    c->pal_pred = dav1d_pal_pred_avx2;
+#endif
+}
--- a/src/x86/itx_init.c
+++ /dev/null
@@ -1,141 +1,0 @@
-/*
- * Copyright © 2018, VideoLAN and dav1d authors
- * Copyright © 2018, Two Orioles, LLC
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * 1. Redistributions of source code must retain the above copyright notice, this
- *    list of conditions and the following disclaimer.
- *
- * 2. Redistributions in binary form must reproduce the above copyright notice,
- *    this list of conditions and the following disclaimer in the documentation
- *    and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
- * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
- * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
- * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
- * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
- * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
- * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
- * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-#include "src/cpu.h"
-#include "src/itx.h"
-
-#define decl_itx2_fns(w, h, opt) \
-decl_itx_fn(dav1d_inv_txfm_add_dct_dct_##w##x##h##_##opt); \
-decl_itx_fn(dav1d_inv_txfm_add_identity_identity_##w##x##h##_##opt)
-
-#define decl_itx12_fns(w, h, opt) \
-decl_itx2_fns(w, h, opt); \
-decl_itx_fn(dav1d_inv_txfm_add_dct_adst_##w##x##h##_##opt); \
-decl_itx_fn(dav1d_inv_txfm_add_dct_flipadst_##w##x##h##_##opt); \
-decl_itx_fn(dav1d_inv_txfm_add_dct_identity_##w##x##h##_##opt); \
-decl_itx_fn(dav1d_inv_txfm_add_adst_dct_##w##x##h##_##opt); \
-decl_itx_fn(dav1d_inv_txfm_add_adst_adst_##w##x##h##_##opt); \
-decl_itx_fn(dav1d_inv_txfm_add_adst_flipadst_##w##x##h##_##opt); \
-decl_itx_fn(dav1d_inv_txfm_add_flipadst_dct_##w##x##h##_##opt); \
-decl_itx_fn(dav1d_inv_txfm_add_flipadst_adst_##w##x##h##_##opt); \
-decl_itx_fn(dav1d_inv_txfm_add_flipadst_flipadst_##w##x##h##_##opt); \
-decl_itx_fn(dav1d_inv_txfm_add_identity_dct_##w##x##h##_##opt)
-
-#define decl_itx16_fns(w, h, opt) \
-decl_itx12_fns(w, h, opt); \
-decl_itx_fn(dav1d_inv_txfm_add_adst_identity_##w##x##h##_##opt); \
-decl_itx_fn(dav1d_inv_txfm_add_flipadst_identity_##w##x##h##_##opt); \
-decl_itx_fn(dav1d_inv_txfm_add_identity_adst_##w##x##h##_##opt); \
-decl_itx_fn(dav1d_inv_txfm_add_identity_flipadst_##w##x##h##_##opt)
-
-#define decl_itx17_fns(w, h, opt) \
-decl_itx16_fns(w, h, opt); \
-decl_itx_fn(dav1d_inv_txfm_add_wht_wht_##w##x##h##_##opt)
-
-decl_itx17_fns( 4,  4, avx2);
-decl_itx16_fns( 4,  8, avx2);
-decl_itx16_fns( 4, 16, avx2);
-decl_itx16_fns( 8,  4, avx2);
-decl_itx16_fns( 8,  8, avx2);
-decl_itx16_fns( 8, 16, avx2);
-decl_itx2_fns ( 8, 32, avx2);
-decl_itx16_fns(16,  4, avx2);
-decl_itx16_fns(16,  8, avx2);
-decl_itx12_fns(16, 16, avx2);
-decl_itx2_fns (16, 32, avx2);
-decl_itx2_fns (32,  8, avx2);
-decl_itx2_fns (32, 16, avx2);
-decl_itx2_fns (32, 32, avx2);
-
-decl_itx_fn(dav1d_inv_txfm_add_dct_dct_16x64_avx2);
-decl_itx_fn(dav1d_inv_txfm_add_dct_dct_32x64_avx2);
-decl_itx_fn(dav1d_inv_txfm_add_dct_dct_64x16_avx2);
-decl_itx_fn(dav1d_inv_txfm_add_dct_dct_64x32_avx2);
-decl_itx_fn(dav1d_inv_txfm_add_dct_dct_64x64_avx2);
-
-void bitfn(dav1d_itx_dsp_init_x86)(Dav1dInvTxfmDSPContext *const c) {
-#define assign_itx_fn(pfx, w, h, type, type_enum, ext) \
-    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
-        dav1d_inv_txfm_add_##type##_##w##x##h##_##ext
-
-#define assign_itx1_fn(pfx, w, h, ext) \
-    assign_itx_fn(pfx, w, h, dct_dct,           DCT_DCT,           ext)
-
-#define assign_itx2_fn(pfx, w, h, ext) \
-    assign_itx1_fn(pfx, w, h, ext); \
-    assign_itx_fn(pfx, w, h, identity_identity, IDTX,              ext)
-
-#define assign_itx12_fn(pfx, w, h, ext) \
-    assign_itx2_fn(pfx, w, h, ext); \
-    assign_itx_fn(pfx, w, h, dct_adst,          ADST_DCT,          ext); \
-    assign_itx_fn(pfx, w, h, dct_flipadst,      FLIPADST_DCT,      ext); \
-    assign_itx_fn(pfx, w, h, dct_identity,      H_DCT,             ext); \
-    assign_itx_fn(pfx, w, h, adst_dct,          DCT_ADST,          ext); \
-    assign_itx_fn(pfx, w, h, adst_adst,         ADST_ADST,         ext); \
-    assign_itx_fn(pfx, w, h, adst_flipadst,     FLIPADST_ADST,     ext); \
-    assign_itx_fn(pfx, w, h, flipadst_dct,      DCT_FLIPADST,      ext); \
-    assign_itx_fn(pfx, w, h, flipadst_adst,     ADST_FLIPADST,     ext); \
-    assign_itx_fn(pfx, w, h, flipadst_flipadst, FLIPADST_FLIPADST, ext); \
-    assign_itx_fn(pfx, w, h, identity_dct,      V_DCT,             ext)
-
-#define assign_itx16_fn(pfx, w, h, ext) \
-    assign_itx12_fn(pfx, w, h, ext); \
-    assign_itx_fn(pfx, w, h, adst_identity,     H_ADST,            ext); \
-    assign_itx_fn(pfx, w, h, flipadst_identity, H_FLIPADST,        ext); \
-    assign_itx_fn(pfx, w, h, identity_adst,     V_ADST,            ext); \
-    assign_itx_fn(pfx, w, h, identity_flipadst, V_FLIPADST,        ext)
-
-#define assign_itx17_fn(pfx, w, h, ext) \
-    assign_itx16_fn(pfx, w, h, ext); \
-    assign_itx_fn(pfx, w, h, wht_wht,           WHT_WHT,           ext)
-
-    const unsigned flags = dav1d_get_cpu_flags();
-
-    if (!(flags & DAV1D_X86_CPU_FLAG_AVX2)) return;
-
-#if BITDEPTH == 8 && ARCH_X86_64
-    assign_itx17_fn( ,  4,  4, avx2);
-    assign_itx16_fn(R,  4,  8, avx2);
-    assign_itx16_fn(R,  4, 16, avx2);
-    assign_itx16_fn(R,  8,  4, avx2);
-    assign_itx16_fn( ,  8,  8, avx2);
-    assign_itx16_fn(R,  8, 16, avx2);
-    assign_itx2_fn (R,  8, 32, avx2);
-    assign_itx16_fn(R, 16,  4, avx2);
-    assign_itx16_fn(R, 16,  8, avx2);
-    assign_itx12_fn( , 16, 16, avx2);
-    assign_itx2_fn (R, 16, 32, avx2);
-    assign_itx1_fn (R, 16, 64, avx2);
-    assign_itx2_fn (R, 32,  8, avx2);
-    assign_itx2_fn (R, 32, 16, avx2);
-    assign_itx2_fn ( , 32, 32, avx2);
-    assign_itx1_fn (R, 32, 64, avx2);
-    assign_itx1_fn (R, 64, 16, avx2);
-    assign_itx1_fn (R, 64, 32, avx2);
-    assign_itx1_fn ( , 64, 64, avx2);
-#endif
-}
--- /dev/null
+++ b/src/x86/itx_init_tmpl.c
@@ -1,0 +1,141 @@
+/*
+ * Copyright © 2018, VideoLAN and dav1d authors
+ * Copyright © 2018, Two Orioles, LLC
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice, this
+ *    list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ *    this list of conditions and the following disclaimer in the documentation
+ *    and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
+ * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "src/cpu.h"
+#include "src/itx.h"
+
+#define decl_itx2_fns(w, h, opt) \
+decl_itx_fn(dav1d_inv_txfm_add_dct_dct_##w##x##h##_##opt); \
+decl_itx_fn(dav1d_inv_txfm_add_identity_identity_##w##x##h##_##opt)
+
+#define decl_itx12_fns(w, h, opt) \
+decl_itx2_fns(w, h, opt); \
+decl_itx_fn(dav1d_inv_txfm_add_dct_adst_##w##x##h##_##opt); \
+decl_itx_fn(dav1d_inv_txfm_add_dct_flipadst_##w##x##h##_##opt); \
+decl_itx_fn(dav1d_inv_txfm_add_dct_identity_##w##x##h##_##opt); \
+decl_itx_fn(dav1d_inv_txfm_add_adst_dct_##w##x##h##_##opt); \
+decl_itx_fn(dav1d_inv_txfm_add_adst_adst_##w##x##h##_##opt); \
+decl_itx_fn(dav1d_inv_txfm_add_adst_flipadst_##w##x##h##_##opt); \
+decl_itx_fn(dav1d_inv_txfm_add_flipadst_dct_##w##x##h##_##opt); \
+decl_itx_fn(dav1d_inv_txfm_add_flipadst_adst_##w##x##h##_##opt); \
+decl_itx_fn(dav1d_inv_txfm_add_flipadst_flipadst_##w##x##h##_##opt); \
+decl_itx_fn(dav1d_inv_txfm_add_identity_dct_##w##x##h##_##opt)
+
+#define decl_itx16_fns(w, h, opt) \
+decl_itx12_fns(w, h, opt); \
+decl_itx_fn(dav1d_inv_txfm_add_adst_identity_##w##x##h##_##opt); \
+decl_itx_fn(dav1d_inv_txfm_add_flipadst_identity_##w##x##h##_##opt); \
+decl_itx_fn(dav1d_inv_txfm_add_identity_adst_##w##x##h##_##opt); \
+decl_itx_fn(dav1d_inv_txfm_add_identity_flipadst_##w##x##h##_##opt)
+
+#define decl_itx17_fns(w, h, opt) \
+decl_itx16_fns(w, h, opt); \
+decl_itx_fn(dav1d_inv_txfm_add_wht_wht_##w##x##h##_##opt)
+
+decl_itx17_fns( 4,  4, avx2);
+decl_itx16_fns( 4,  8, avx2);
+decl_itx16_fns( 4, 16, avx2);
+decl_itx16_fns( 8,  4, avx2);
+decl_itx16_fns( 8,  8, avx2);
+decl_itx16_fns( 8, 16, avx2);
+decl_itx2_fns ( 8, 32, avx2);
+decl_itx16_fns(16,  4, avx2);
+decl_itx16_fns(16,  8, avx2);
+decl_itx12_fns(16, 16, avx2);
+decl_itx2_fns (16, 32, avx2);
+decl_itx2_fns (32,  8, avx2);
+decl_itx2_fns (32, 16, avx2);
+decl_itx2_fns (32, 32, avx2);
+
+decl_itx_fn(dav1d_inv_txfm_add_dct_dct_16x64_avx2);
+decl_itx_fn(dav1d_inv_txfm_add_dct_dct_32x64_avx2);
+decl_itx_fn(dav1d_inv_txfm_add_dct_dct_64x16_avx2);
+decl_itx_fn(dav1d_inv_txfm_add_dct_dct_64x32_avx2);
+decl_itx_fn(dav1d_inv_txfm_add_dct_dct_64x64_avx2);
+
+void bitfn(dav1d_itx_dsp_init_x86)(Dav1dInvTxfmDSPContext *const c) {
+#define assign_itx_fn(pfx, w, h, type, type_enum, ext) \
+    c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
+        dav1d_inv_txfm_add_##type##_##w##x##h##_##ext
+
+#define assign_itx1_fn(pfx, w, h, ext) \
+    assign_itx_fn(pfx, w, h, dct_dct,           DCT_DCT,           ext)
+
+#define assign_itx2_fn(pfx, w, h, ext) \
+    assign_itx1_fn(pfx, w, h, ext); \
+    assign_itx_fn(pfx, w, h, identity_identity, IDTX,              ext)
+
+#define assign_itx12_fn(pfx, w, h, ext) \
+    assign_itx2_fn(pfx, w, h, ext); \
+    assign_itx_fn(pfx, w, h, dct_adst,          ADST_DCT,          ext); \
+    assign_itx_fn(pfx, w, h, dct_flipadst,      FLIPADST_DCT,      ext); \
+    assign_itx_fn(pfx, w, h, dct_identity,      H_DCT,             ext); \
+    assign_itx_fn(pfx, w, h, adst_dct,          DCT_ADST,          ext); \
+    assign_itx_fn(pfx, w, h, adst_adst,         ADST_ADST,         ext); \
+    assign_itx_fn(pfx, w, h, adst_flipadst,     FLIPADST_ADST,     ext); \
+    assign_itx_fn(pfx, w, h, flipadst_dct,      DCT_FLIPADST,      ext); \
+    assign_itx_fn(pfx, w, h, flipadst_adst,     ADST_FLIPADST,     ext); \
+    assign_itx_fn(pfx, w, h, flipadst_flipadst, FLIPADST_FLIPADST, ext); \
+    assign_itx_fn(pfx, w, h, identity_dct,      V_DCT,             ext)
+
+#define assign_itx16_fn(pfx, w, h, ext) \
+    assign_itx12_fn(pfx, w, h, ext); \
+    assign_itx_fn(pfx, w, h, adst_identity,     H_ADST,            ext); \
+    assign_itx_fn(pfx, w, h, flipadst_identity, H_FLIPADST,        ext); \
+    assign_itx_fn(pfx, w, h, identity_adst,     V_ADST,            ext); \
+    assign_itx_fn(pfx, w, h, identity_flipadst, V_FLIPADST,        ext)
+
+#define assign_itx17_fn(pfx, w, h, ext) \
+    assign_itx16_fn(pfx, w, h, ext); \
+    assign_itx_fn(pfx, w, h, wht_wht,           WHT_WHT,           ext)
+
+    const unsigned flags = dav1d_get_cpu_flags();
+
+    if (!(flags & DAV1D_X86_CPU_FLAG_AVX2)) return;
+
+#if BITDEPTH == 8 && ARCH_X86_64
+    assign_itx17_fn( ,  4,  4, avx2);
+    assign_itx16_fn(R,  4,  8, avx2);
+    assign_itx16_fn(R,  4, 16, avx2);
+    assign_itx16_fn(R,  8,  4, avx2);
+    assign_itx16_fn( ,  8,  8, avx2);
+    assign_itx16_fn(R,  8, 16, avx2);
+    assign_itx2_fn (R,  8, 32, avx2);
+    assign_itx16_fn(R, 16,  4, avx2);
+    assign_itx16_fn(R, 16,  8, avx2);
+    assign_itx12_fn( , 16, 16, avx2);
+    assign_itx2_fn (R, 16, 32, avx2);
+    assign_itx1_fn (R, 16, 64, avx2);
+    assign_itx2_fn (R, 32,  8, avx2);
+    assign_itx2_fn (R, 32, 16, avx2);
+    assign_itx2_fn ( , 32, 32, avx2);
+    assign_itx1_fn (R, 32, 64, avx2);
+    assign_itx1_fn (R, 64, 16, avx2);
+    assign_itx1_fn (R, 64, 32, avx2);
+    assign_itx1_fn ( , 64, 64, avx2);
+#endif
+}
--- a/src/x86/loopfilter_init.c
+++ /dev/null
@@ -1,47 +1,0 @@
-/*
- * Copyright © 2018, VideoLAN and dav1d authors
- * Copyright © 2018, Two Orioles, LLC
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * 1. Redistributions of source code must retain the above copyright notice, this
- *    list of conditions and the following disclaimer.
- *
- * 2. Redistributions in binary form must reproduce the above copyright notice,
- *    this list of conditions and the following disclaimer in the documentation
- *    and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
- * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
- * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
- * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
- * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
- * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
- * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
- * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-#include "src/cpu.h"
-#include "src/loopfilter.h"
-
-decl_loopfilter_sb_fn(dav1d_lpf_h_sb_y_avx2);
-decl_loopfilter_sb_fn(dav1d_lpf_v_sb_y_avx2);
-decl_loopfilter_sb_fn(dav1d_lpf_h_sb_uv_avx2);
-decl_loopfilter_sb_fn(dav1d_lpf_v_sb_uv_avx2);
-
-void bitfn(dav1d_loop_filter_dsp_init_x86)(Dav1dLoopFilterDSPContext *const c) {
-    const unsigned flags = dav1d_get_cpu_flags();
-
-    if (!(flags & DAV1D_X86_CPU_FLAG_AVX2)) return;
-
-#if BITDEPTH == 8 && ARCH_X86_64
-    c->loop_filter_sb[0][0] = dav1d_lpf_h_sb_y_avx2;
-    c->loop_filter_sb[0][1] = dav1d_lpf_v_sb_y_avx2;
-    c->loop_filter_sb[1][0] = dav1d_lpf_h_sb_uv_avx2;
-    c->loop_filter_sb[1][1] = dav1d_lpf_v_sb_uv_avx2;
-#endif
-}
--- /dev/null
+++ b/src/x86/loopfilter_init_tmpl.c
@@ -1,0 +1,47 @@
+/*
+ * Copyright © 2018, VideoLAN and dav1d authors
+ * Copyright © 2018, Two Orioles, LLC
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice, this
+ *    list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ *    this list of conditions and the following disclaimer in the documentation
+ *    and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
+ * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "src/cpu.h"
+#include "src/loopfilter.h"
+
+decl_loopfilter_sb_fn(dav1d_lpf_h_sb_y_avx2);
+decl_loopfilter_sb_fn(dav1d_lpf_v_sb_y_avx2);
+decl_loopfilter_sb_fn(dav1d_lpf_h_sb_uv_avx2);
+decl_loopfilter_sb_fn(dav1d_lpf_v_sb_uv_avx2);
+
+void bitfn(dav1d_loop_filter_dsp_init_x86)(Dav1dLoopFilterDSPContext *const c) {
+    const unsigned flags = dav1d_get_cpu_flags();
+
+    if (!(flags & DAV1D_X86_CPU_FLAG_AVX2)) return;
+
+#if BITDEPTH == 8 && ARCH_X86_64
+    c->loop_filter_sb[0][0] = dav1d_lpf_h_sb_y_avx2;
+    c->loop_filter_sb[0][1] = dav1d_lpf_v_sb_y_avx2;
+    c->loop_filter_sb[1][0] = dav1d_lpf_h_sb_uv_avx2;
+    c->loop_filter_sb[1][1] = dav1d_lpf_v_sb_uv_avx2;
+#endif
+}
--- a/src/x86/looprestoration_init.c
+++ /dev/null
@@ -1,86 +1,0 @@
-/*
- * Copyright © 2018, VideoLAN and dav1d authors
- * Copyright © 2018, Two Orioles, LLC
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * 1. Redistributions of source code must retain the above copyright notice, this
- *    list of conditions and the following disclaimer.
- *
- * 2. Redistributions in binary form must reproduce the above copyright notice,
- *    this list of conditions and the following disclaimer in the documentation
- *    and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
- * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
- * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
- * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
- * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
- * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
- * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
- * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-#include "src/cpu.h"
-#include "src/looprestoration.h"
-
-#include "common/attributes.h"
-#include "common/intops.h"
-
-#if BITDEPTH == 8 && ARCH_X86_64
-void dav1d_wiener_filter_h_avx2(int16_t *dst, const pixel (*left)[4],
-                                const pixel *src, ptrdiff_t stride,
-                                const int16_t fh[7], const intptr_t w,
-                                int h, enum LrEdgeFlags edges);
-void dav1d_wiener_filter_v_avx2(pixel *dst, ptrdiff_t stride,
-                                const int16_t *mid, int w, int h,
-                                const int16_t fv[7], enum LrEdgeFlags edges);
-
-// Future potential optimizations:
-// - special chroma versions which don't filter [0]/[6];
-// - running filter_h_avx2 transposed (one col of 32 pixels per iteration, top
-//   to bottom) instead of scanline-ordered should be faster since then the
-//   if (have_left) and similar conditions run only once instead of per line;
-// - filter_v_avx2 currently runs 16 pixels per iteration, it should be possible
-//   to run 32 (like filter_h_avx2), and then all vpermqs can go;
-// - maybe split out the top/bottom filter_h_avx2 from the main body filter_h_avx2,
-//   since then the have_left condition can be inlined;
-// - consider having the wrapper (wiener_filter_avx2) also in hand-written
-//   assembly, so the setup overhead is minimized.
-
-static void wiener_filter_avx2(pixel *const dst, const ptrdiff_t dst_stride,
-                               const pixel (*const left)[4],
-                               const pixel *lpf, const ptrdiff_t lpf_stride,
-                               const int w, const int h, const int16_t fh[7],
-                               const int16_t fv[7], const enum LrEdgeFlags edges)
-{
-    ALIGN_STK_32(int16_t, mid, 68 * 384,);
-
-    // horizontal filter
-    dav1d_wiener_filter_h_avx2(&mid[2 * 384], left, dst, dst_stride,
-                               fh, w, h, edges);
-    if (edges & LR_HAVE_TOP)
-        dav1d_wiener_filter_h_avx2(mid, NULL, lpf, lpf_stride,
-                                   fh, w, 2, edges);
-    if (edges & LR_HAVE_BOTTOM)
-        dav1d_wiener_filter_h_avx2(&mid[(2 + h) * 384], NULL,
-                                   lpf + 6 * PXSTRIDE(lpf_stride), lpf_stride,
-                                   fh, w, 2, edges);
-
-    dav1d_wiener_filter_v_avx2(dst, dst_stride, &mid[2*384], w, h, fv, edges);
-}
-#endif
-
-void bitfn(dav1d_loop_restoration_dsp_init_x86)(Dav1dLoopRestorationDSPContext *const c) {
-    const unsigned flags = dav1d_get_cpu_flags();
-
-    if (!(flags & DAV1D_X86_CPU_FLAG_AVX2)) return;
-
-#if BITDEPTH == 8 && ARCH_X86_64
-    c->wiener = wiener_filter_avx2;
-#endif
-}
--- /dev/null
+++ b/src/x86/looprestoration_init_tmpl.c
@@ -1,0 +1,86 @@
+/*
+ * Copyright © 2018, VideoLAN and dav1d authors
+ * Copyright © 2018, Two Orioles, LLC
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice, this
+ *    list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ *    this list of conditions and the following disclaimer in the documentation
+ *    and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
+ * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "src/cpu.h"
+#include "src/looprestoration.h"
+
+#include "common/attributes.h"
+#include "common/intops.h"
+
+#if BITDEPTH == 8 && ARCH_X86_64
+void dav1d_wiener_filter_h_avx2(int16_t *dst, const pixel (*left)[4],
+                                const pixel *src, ptrdiff_t stride,
+                                const int16_t fh[7], const intptr_t w,
+                                int h, enum LrEdgeFlags edges);
+void dav1d_wiener_filter_v_avx2(pixel *dst, ptrdiff_t stride,
+                                const int16_t *mid, int w, int h,
+                                const int16_t fv[7], enum LrEdgeFlags edges);
+
+// Future potential optimizations:
+// - special chroma versions which don't filter [0]/[6];
+// - running filter_h_avx2 transposed (one col of 32 pixels per iteration, top
+//   to bottom) instead of scanline-ordered should be faster since then the
+//   if (have_left) and similar conditions run only once instead of per line;
+// - filter_v_avx2 currently runs 16 pixels per iteration, it should be possible
+//   to run 32 (like filter_h_avx2), and then all vpermqs can go;
+// - maybe split out the top/bottom filter_h_avx2 from the main body filter_h_avx2,
+//   since then the have_left condition can be inlined;
+// - consider having the wrapper (wiener_filter_avx2) also in hand-written
+//   assembly, so the setup overhead is minimized.
+
+static void wiener_filter_avx2(pixel *const dst, const ptrdiff_t dst_stride,
+                               const pixel (*const left)[4],
+                               const pixel *lpf, const ptrdiff_t lpf_stride,
+                               const int w, const int h, const int16_t fh[7],
+                               const int16_t fv[7], const enum LrEdgeFlags edges)
+{
+    ALIGN_STK_32(int16_t, mid, 68 * 384,);
+
+    // horizontal filter
+    dav1d_wiener_filter_h_avx2(&mid[2 * 384], left, dst, dst_stride,
+                               fh, w, h, edges);
+    if (edges & LR_HAVE_TOP)
+        dav1d_wiener_filter_h_avx2(mid, NULL, lpf, lpf_stride,
+                                   fh, w, 2, edges);
+    if (edges & LR_HAVE_BOTTOM)
+        dav1d_wiener_filter_h_avx2(&mid[(2 + h) * 384], NULL,
+                                   lpf + 6 * PXSTRIDE(lpf_stride), lpf_stride,
+                                   fh, w, 2, edges);
+
+    dav1d_wiener_filter_v_avx2(dst, dst_stride, &mid[2*384], w, h, fv, edges);
+}
+#endif
+
+void bitfn(dav1d_loop_restoration_dsp_init_x86)(Dav1dLoopRestorationDSPContext *const c) {
+    const unsigned flags = dav1d_get_cpu_flags();
+
+    if (!(flags & DAV1D_X86_CPU_FLAG_AVX2)) return;
+
+#if BITDEPTH == 8 && ARCH_X86_64
+    c->wiener = wiener_filter_avx2;
+#endif
+}
--- a/src/x86/mc_init.c
+++ /dev/null
@@ -1,95 +1,0 @@
-/*
- * Copyright © 2018, VideoLAN and dav1d authors
- * Copyright © 2018, Two Orioles, LLC
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * 1. Redistributions of source code must retain the above copyright notice, this
- *    list of conditions and the following disclaimer.
- *
- * 2. Redistributions in binary form must reproduce the above copyright notice,
- *    this list of conditions and the following disclaimer in the documentation
- *    and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
- * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
- * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
- * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
- * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
- * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
- * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
- * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-#include "src/cpu.h"
-#include "src/mc.h"
-
-decl_mc_fn(dav1d_put_8tap_regular_avx2);
-decl_mc_fn(dav1d_put_8tap_regular_smooth_avx2);
-decl_mc_fn(dav1d_put_8tap_regular_sharp_avx2);
-decl_mc_fn(dav1d_put_8tap_smooth_avx2);
-decl_mc_fn(dav1d_put_8tap_smooth_regular_avx2);
-decl_mc_fn(dav1d_put_8tap_smooth_sharp_avx2);
-decl_mc_fn(dav1d_put_8tap_sharp_avx2);
-decl_mc_fn(dav1d_put_8tap_sharp_regular_avx2);
-decl_mc_fn(dav1d_put_8tap_sharp_smooth_avx2);
-decl_mc_fn(dav1d_put_bilin_avx2);
-
-decl_mct_fn(dav1d_prep_8tap_regular_avx2);
-decl_mct_fn(dav1d_prep_8tap_regular_smooth_avx2);
-decl_mct_fn(dav1d_prep_8tap_regular_sharp_avx2);
-decl_mct_fn(dav1d_prep_8tap_smooth_avx2);
-decl_mct_fn(dav1d_prep_8tap_smooth_regular_avx2);
-decl_mct_fn(dav1d_prep_8tap_smooth_sharp_avx2);
-decl_mct_fn(dav1d_prep_8tap_sharp_avx2);
-decl_mct_fn(dav1d_prep_8tap_sharp_regular_avx2);
-decl_mct_fn(dav1d_prep_8tap_sharp_smooth_avx2);
-decl_mct_fn(dav1d_prep_bilin_avx2);
-
-decl_avg_fn(dav1d_avg_avx2);
-decl_w_avg_fn(dav1d_w_avg_avx2);
-decl_mask_fn(dav1d_mask_avx2);
-decl_w_mask_fn(dav1d_w_mask_420_avx2);
-
-void bitfn(dav1d_mc_dsp_init_x86)(Dav1dMCDSPContext *const c) {
-#define init_mc_fn(type, name, suffix) \
-    c->mc[type] = dav1d_put_##name##_##suffix
-#define init_mct_fn(type, name, suffix) \
-    c->mct[type] = dav1d_prep_##name##_##suffix
-    const unsigned flags = dav1d_get_cpu_flags();
-
-    if (!(flags & DAV1D_X86_CPU_FLAG_AVX2)) return;
-
-#if BITDEPTH == 8 && ARCH_X86_64
-    init_mc_fn (FILTER_2D_8TAP_REGULAR,        8tap_regular,        avx2);
-    init_mc_fn (FILTER_2D_8TAP_REGULAR_SMOOTH, 8tap_regular_smooth, avx2);
-    init_mc_fn (FILTER_2D_8TAP_REGULAR_SHARP,  8tap_regular_sharp,  avx2);
-    init_mc_fn (FILTER_2D_8TAP_SMOOTH_REGULAR, 8tap_smooth_regular, avx2);
-    init_mc_fn (FILTER_2D_8TAP_SMOOTH,         8tap_smooth,         avx2);
-    init_mc_fn (FILTER_2D_8TAP_SMOOTH_SHARP,   8tap_smooth_sharp,   avx2);
-    init_mc_fn (FILTER_2D_8TAP_SHARP_REGULAR,  8tap_sharp_regular,  avx2);
-    init_mc_fn (FILTER_2D_8TAP_SHARP_SMOOTH,   8tap_sharp_smooth,   avx2);
-    init_mc_fn (FILTER_2D_8TAP_SHARP,          8tap_sharp,          avx2);
-    init_mc_fn (FILTER_2D_BILINEAR,            bilin,               avx2);
-
-    init_mct_fn(FILTER_2D_8TAP_REGULAR,        8tap_regular,        avx2);
-    init_mct_fn(FILTER_2D_8TAP_REGULAR_SMOOTH, 8tap_regular_smooth, avx2);
-    init_mct_fn(FILTER_2D_8TAP_REGULAR_SHARP,  8tap_regular_sharp,  avx2);
-    init_mct_fn(FILTER_2D_8TAP_SMOOTH_REGULAR, 8tap_smooth_regular, avx2);
-    init_mct_fn(FILTER_2D_8TAP_SMOOTH,         8tap_smooth,         avx2);
-    init_mct_fn(FILTER_2D_8TAP_SMOOTH_SHARP,   8tap_smooth_sharp,   avx2);
-    init_mct_fn(FILTER_2D_8TAP_SHARP_REGULAR,  8tap_sharp_regular,  avx2);
-    init_mct_fn(FILTER_2D_8TAP_SHARP_SMOOTH,   8tap_sharp_smooth,   avx2);
-    init_mct_fn(FILTER_2D_8TAP_SHARP,          8tap_sharp,          avx2);
-    init_mct_fn(FILTER_2D_BILINEAR,            bilin,               avx2);
-
-    c->avg = dav1d_avg_avx2;
-    c->w_avg = dav1d_w_avg_avx2;
-    c->mask = dav1d_mask_avx2;
-    c->w_mask[2] = dav1d_w_mask_420_avx2;
-#endif
-}
--- /dev/null
+++ b/src/x86/mc_init_tmpl.c
@@ -1,0 +1,95 @@
+/*
+ * Copyright © 2018, VideoLAN and dav1d authors
+ * Copyright © 2018, Two Orioles, LLC
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice, this
+ *    list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ *    this list of conditions and the following disclaimer in the documentation
+ *    and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
+ * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "src/cpu.h"
+#include "src/mc.h"
+
+decl_mc_fn(dav1d_put_8tap_regular_avx2);
+decl_mc_fn(dav1d_put_8tap_regular_smooth_avx2);
+decl_mc_fn(dav1d_put_8tap_regular_sharp_avx2);
+decl_mc_fn(dav1d_put_8tap_smooth_avx2);
+decl_mc_fn(dav1d_put_8tap_smooth_regular_avx2);
+decl_mc_fn(dav1d_put_8tap_smooth_sharp_avx2);
+decl_mc_fn(dav1d_put_8tap_sharp_avx2);
+decl_mc_fn(dav1d_put_8tap_sharp_regular_avx2);
+decl_mc_fn(dav1d_put_8tap_sharp_smooth_avx2);
+decl_mc_fn(dav1d_put_bilin_avx2);
+
+decl_mct_fn(dav1d_prep_8tap_regular_avx2);
+decl_mct_fn(dav1d_prep_8tap_regular_smooth_avx2);
+decl_mct_fn(dav1d_prep_8tap_regular_sharp_avx2);
+decl_mct_fn(dav1d_prep_8tap_smooth_avx2);
+decl_mct_fn(dav1d_prep_8tap_smooth_regular_avx2);
+decl_mct_fn(dav1d_prep_8tap_smooth_sharp_avx2);
+decl_mct_fn(dav1d_prep_8tap_sharp_avx2);
+decl_mct_fn(dav1d_prep_8tap_sharp_regular_avx2);
+decl_mct_fn(dav1d_prep_8tap_sharp_smooth_avx2);
+decl_mct_fn(dav1d_prep_bilin_avx2);
+
+decl_avg_fn(dav1d_avg_avx2);
+decl_w_avg_fn(dav1d_w_avg_avx2);
+decl_mask_fn(dav1d_mask_avx2);
+decl_w_mask_fn(dav1d_w_mask_420_avx2);
+
+void bitfn(dav1d_mc_dsp_init_x86)(Dav1dMCDSPContext *const c) {
+#define init_mc_fn(type, name, suffix) \
+    c->mc[type] = dav1d_put_##name##_##suffix
+#define init_mct_fn(type, name, suffix) \
+    c->mct[type] = dav1d_prep_##name##_##suffix
+    const unsigned flags = dav1d_get_cpu_flags();
+
+    if (!(flags & DAV1D_X86_CPU_FLAG_AVX2)) return;
+
+#if BITDEPTH == 8 && ARCH_X86_64
+    init_mc_fn (FILTER_2D_8TAP_REGULAR,        8tap_regular,        avx2);
+    init_mc_fn (FILTER_2D_8TAP_REGULAR_SMOOTH, 8tap_regular_smooth, avx2);
+    init_mc_fn (FILTER_2D_8TAP_REGULAR_SHARP,  8tap_regular_sharp,  avx2);
+    init_mc_fn (FILTER_2D_8TAP_SMOOTH_REGULAR, 8tap_smooth_regular, avx2);
+    init_mc_fn (FILTER_2D_8TAP_SMOOTH,         8tap_smooth,         avx2);
+    init_mc_fn (FILTER_2D_8TAP_SMOOTH_SHARP,   8tap_smooth_sharp,   avx2);
+    init_mc_fn (FILTER_2D_8TAP_SHARP_REGULAR,  8tap_sharp_regular,  avx2);
+    init_mc_fn (FILTER_2D_8TAP_SHARP_SMOOTH,   8tap_sharp_smooth,   avx2);
+    init_mc_fn (FILTER_2D_8TAP_SHARP,          8tap_sharp,          avx2);
+    init_mc_fn (FILTER_2D_BILINEAR,            bilin,               avx2);
+
+    init_mct_fn(FILTER_2D_8TAP_REGULAR,        8tap_regular,        avx2);
+    init_mct_fn(FILTER_2D_8TAP_REGULAR_SMOOTH, 8tap_regular_smooth, avx2);
+    init_mct_fn(FILTER_2D_8TAP_REGULAR_SHARP,  8tap_regular_sharp,  avx2);
+    init_mct_fn(FILTER_2D_8TAP_SMOOTH_REGULAR, 8tap_smooth_regular, avx2);
+    init_mct_fn(FILTER_2D_8TAP_SMOOTH,         8tap_smooth,         avx2);
+    init_mct_fn(FILTER_2D_8TAP_SMOOTH_SHARP,   8tap_smooth_sharp,   avx2);
+    init_mct_fn(FILTER_2D_8TAP_SHARP_REGULAR,  8tap_sharp_regular,  avx2);
+    init_mct_fn(FILTER_2D_8TAP_SHARP_SMOOTH,   8tap_sharp_smooth,   avx2);
+    init_mct_fn(FILTER_2D_8TAP_SHARP,          8tap_sharp,          avx2);
+    init_mct_fn(FILTER_2D_BILINEAR,            bilin,               avx2);
+
+    c->avg = dav1d_avg_avx2;
+    c->w_avg = dav1d_w_avg_avx2;
+    c->mask = dav1d_mask_avx2;
+    c->w_mask[2] = dav1d_w_mask_420_avx2;
+#endif
+}