ref: 8d2dd439005f72e2f73fc6155f0c2245cbf3227f
parent: d400361524ce739db30d552a9e54809d812710c6
author: Henrik Gramner <gramner@twoorioles.com>
date: Thu May 9 16:39:08 EDT 2019
Add __attribute__((cold)) to rarely used functions
--- a/include/common/attributes.h
+++ b/include/common/attributes.h
@@ -34,10 +34,12 @@
#ifdef __GNUC__
#define ATTR_ALIAS __attribute__((may_alias))
-#define ATTR_FORMAT_PRINTF(fmt, attr) __attribute__((__format__(__printf__, fmt, attr)));
+#define ATTR_FORMAT_PRINTF(fmt, attr) __attribute__((__format__(__printf__, fmt, attr)))
+#define COLD __attribute__((cold))
#else
#define ATTR_ALIAS
#define ATTR_FORMAT_PRINTF(fmt, attr)
+#define COLD
#endif
#if ARCH_X86_64
--- a/src/arm/cdef_init_tmpl.c
+++ b/src/arm/cdef_init_tmpl.c
@@ -24,7 +24,6 @@
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
-#include "common/attributes.h"
#include "src/cpu.h"
#include "src/cdef.h"
@@ -72,7 +71,7 @@
#endif
-void bitfn(dav1d_cdef_dsp_init_arm)(Dav1dCdefDSPContext *const c) {
+COLD void bitfn(dav1d_cdef_dsp_init_arm)(Dav1dCdefDSPContext *const c) {
const unsigned flags = dav1d_get_cpu_flags();
if (!(flags & DAV1D_ARM_CPU_FLAG_NEON)) return;
--- a/src/arm/cpu.c
+++ b/src/arm/cpu.c
@@ -27,6 +27,8 @@
#include "config.h"
+#include "common/attributes.h"
+
#include "src/arm/cpu.h"
#if defined(HAVE_GETAUXVAL) && ARCH_ARM
@@ -73,7 +75,7 @@
}
#endif
-unsigned dav1d_get_cpu_flags_arm(void) {
+COLD unsigned dav1d_get_cpu_flags_arm(void) {
unsigned flags = 0;
#if ARCH_AARCH64
flags |= DAV1D_ARM_CPU_FLAG_NEON;
--- a/src/arm/loopfilter_init_tmpl.c
+++ b/src/arm/loopfilter_init_tmpl.c
@@ -33,7 +33,7 @@
decl_loopfilter_sb_fn(dav1d_lpf_h_sb_uv_neon);
decl_loopfilter_sb_fn(dav1d_lpf_v_sb_uv_neon);
-void bitfn(dav1d_loop_filter_dsp_init_arm)(Dav1dLoopFilterDSPContext *const c) {
+COLD void bitfn(dav1d_loop_filter_dsp_init_arm)(Dav1dLoopFilterDSPContext *const c) {
const unsigned flags = dav1d_get_cpu_flags();
if (!(flags & DAV1D_ARM_CPU_FLAG_NEON)) return;
--- a/src/arm/looprestoration_init_tmpl.c
+++ b/src/arm/looprestoration_init_tmpl.c
@@ -27,8 +27,6 @@
#include "src/cpu.h"
#include "src/looprestoration.h"
-
-#include "common/attributes.h"
#include "src/tables.h"
#if BITDEPTH == 8
@@ -258,7 +256,7 @@
#endif // ARCH_AARCH64
#endif // BITDEPTH == 8
-void bitfn(dav1d_loop_restoration_dsp_init_arm)(Dav1dLoopRestorationDSPContext *const c) {
+COLD void bitfn(dav1d_loop_restoration_dsp_init_arm)(Dav1dLoopRestorationDSPContext *const c) {
const unsigned flags = dav1d_get_cpu_flags();
if (!(flags & DAV1D_ARM_CPU_FLAG_NEON)) return;
--- a/src/cdef_tmpl.c
+++ b/src/cdef_tmpl.c
@@ -254,7 +254,7 @@
return best_dir;
}
-void bitfn(dav1d_cdef_dsp_init)(Dav1dCdefDSPContext *const c) {
+COLD void bitfn(dav1d_cdef_dsp_init)(Dav1dCdefDSPContext *const c) {
c->dir = cdef_find_dir_c;
c->fb[0] = cdef_filter_block_8x8_c;
c->fb[1] = cdef_filter_block_4x8_c;
--- a/src/cpu.c
+++ b/src/cpu.c
@@ -32,7 +32,7 @@
static unsigned flags_mask = -1;
-unsigned dav1d_get_cpu_flags(void) {
+COLD unsigned dav1d_get_cpu_flags(void) {
static unsigned flags;
static uint8_t checked = 0;
@@ -49,6 +49,6 @@
return flags & flags_mask;
}
-void dav1d_set_cpu_flags_mask(const unsigned mask) {
+COLD void dav1d_set_cpu_flags_mask(const unsigned mask) {
flags_mask = mask;
}
--- a/src/cpu.h
+++ b/src/cpu.h
@@ -30,6 +30,8 @@
#include "config.h"
+#include "common/attributes.h"
+
#include "dav1d/common.h"
#if ARCH_AARCH64 || ARCH_ARM
--- a/src/ipred_tmpl.c
+++ b/src/ipred_tmpl.c
@@ -725,7 +725,7 @@
}
}
-void bitfn(dav1d_intra_pred_dsp_init)(Dav1dIntraPredDSPContext *const c) {
+COLD void bitfn(dav1d_intra_pred_dsp_init)(Dav1dIntraPredDSPContext *const c) {
c->intra_pred[DC_PRED ] = ipred_dc_c;
c->intra_pred[DC_128_PRED ] = ipred_dc_128_c;
c->intra_pred[TOP_DC_PRED ] = ipred_dc_top_c;
--- a/src/itx_tmpl.c
+++ b/src/itx_tmpl.c
@@ -193,7 +193,7 @@
memset(coeff, 0, sizeof(*coeff) * 4 * 4);
}
-void bitfn(dav1d_itx_dsp_init)(Dav1dInvTxfmDSPContext *const c) {
+COLD void bitfn(dav1d_itx_dsp_init)(Dav1dInvTxfmDSPContext *const c) {
#define assign_itx_all_fn64(w, h, pfx) \
c->itxfm_add[pfx##TX_##w##X##h][DCT_DCT ] = \
inv_txfm_add_dct_dct_##w##x##h##_c
--- a/src/lib.c
+++ b/src/lib.c
@@ -46,17 +46,17 @@
#include "src/wedge.h"
#include "src/film_grain.h"
-static void init_internal(void) {
+static COLD void init_internal(void) {
dav1d_init_wedge_masks();
dav1d_init_interintra_masks();
dav1d_init_qm_tables();
}
-const char *dav1d_version(void) {
+COLD const char *dav1d_version(void) {
return DAV1D_VERSION;
}
-void dav1d_default_settings(Dav1dSettings *const s) {
+COLD void dav1d_default_settings(Dav1dSettings *const s) {
s->n_frame_threads = 1;
s->n_tile_threads = 1;
s->apply_grain = 1;
@@ -71,9 +71,7 @@
static void close_internal(Dav1dContext **const c_out, int flush);
-int dav1d_open(Dav1dContext **const c_out,
- const Dav1dSettings *const s)
-{
+COLD int dav1d_open(Dav1dContext **const c_out, const Dav1dSettings *const s) {
static pthread_once_t initted = PTHREAD_ONCE_INIT;
pthread_once(&initted, init_internal);
@@ -432,12 +430,12 @@
c->frame_thread.next = 0;
}
-void dav1d_close(Dav1dContext **const c_out) {
+COLD void dav1d_close(Dav1dContext **const c_out) {
validate_input(c_out != NULL);
close_internal(c_out, 1);
}
-static void close_internal(Dav1dContext **const c_out, int flush) {
+static COLD void close_internal(Dav1dContext **const c_out, int flush) {
Dav1dContext *const c = *c_out;
if (!c) return;
--- a/src/log.c
+++ b/src/log.c
@@ -36,14 +36,14 @@
#include "src/internal.h"
#include "src/log.h"
-void dav1d_log_default_callback(void *const cookie,
- const char *const format, va_list ap)
+COLD void dav1d_log_default_callback(void *const cookie,
+ const char *const format, va_list ap)
{
vfprintf(stderr, format, ap);
}
#if CONFIG_LOG
-void dav1d_log(Dav1dContext *const c, const char *const format, ...) {
+COLD void dav1d_log(Dav1dContext *const c, const char *const format, ...) {
validate_input(c != NULL);
if (!c->logger.callback)
--- a/src/loopfilter_tmpl.c
+++ b/src/loopfilter_tmpl.c
@@ -244,7 +244,7 @@
}
}
-void bitfn(dav1d_loop_filter_dsp_init)(Dav1dLoopFilterDSPContext *const c) {
+COLD void bitfn(dav1d_loop_filter_dsp_init)(Dav1dLoopFilterDSPContext *const c) {
c->loop_filter_sb[0][0] = loop_filter_h_sb128y_c;
c->loop_filter_sb[0][1] = loop_filter_v_sb128y_c;
c->loop_filter_sb[1][0] = loop_filter_h_sb128uv_c;
--- a/src/looprestoration_tmpl.c
+++ b/src/looprestoration_tmpl.c
@@ -573,7 +573,7 @@
}
}
-void bitfn(dav1d_loop_restoration_dsp_init)(Dav1dLoopRestorationDSPContext *const c) {
+COLD void bitfn(dav1d_loop_restoration_dsp_init)(Dav1dLoopRestorationDSPContext *const c) {
c->wiener = wiener_c;
c->selfguided = selfguided_c;
--- a/src/mc_tmpl.c
+++ b/src/mc_tmpl.c
@@ -912,7 +912,7 @@
} while (--h);
}
-void bitfn(dav1d_mc_dsp_init)(Dav1dMCDSPContext *const c) {
+COLD void bitfn(dav1d_mc_dsp_init)(Dav1dMCDSPContext *const c) {
#define init_mc_fns(type, name) do { \
c->mc [type] = put_##name##_c; \
c->mc_scaled [type] = put_##name##_scaled_c; \
--- a/src/qm.c
+++ b/src/qm.c
@@ -29,6 +29,8 @@
#include <string.h>
+#include "common/attributes.h"
+
#include "src/qm.h"
static const uint8_t qm_tbl_4x4_t[][2][10] = {
@@ -3104,7 +3106,7 @@
}
}
-void dav1d_init_qm_tables(void) {
+COLD void dav1d_init_qm_tables(void) {
// This function is guaranteed to be called only once
for (int i = 0; i < 15; i++)
--- a/src/wedge.c
+++ b/src/wedge.c
@@ -155,7 +155,7 @@
}
}
-static void init_chroma(uint8_t *chroma, const uint8_t *luma,
+static COLD void init_chroma(uint8_t *chroma, const uint8_t *luma,
const int sign, const int w, const int h, const int ss_ver)
{
for (int y = 0; y < h; y += 1 + ss_ver) {
@@ -169,12 +169,12 @@
}
}
-static void fill2d_16x2(uint8_t *dst, const int w, const int h,
- const enum BlockSize bs,
- const uint8_t (*const master)[64 * 64],
- const wedge_code_type *const cb,
- uint8_t *masks_444, uint8_t *masks_422,
- uint8_t *masks_420, const unsigned signs)
+static COLD void fill2d_16x2(uint8_t *dst, const int w, const int h,
+ const enum BlockSize bs,
+ const uint8_t (*const master)[64 * 64],
+ const wedge_code_type *const cb,
+ uint8_t *masks_444, uint8_t *masks_422,
+ uint8_t *masks_420, const unsigned signs)
{
uint8_t *ptr = dst;
for (int n = 0; n < 16; n++) {
@@ -222,7 +222,7 @@
}
}
-void dav1d_init_wedge_masks(void) {
+COLD void dav1d_init_wedge_masks(void) {
// This function is guaranteed to be called only once
enum WedgeMasterLineType {
@@ -304,10 +304,10 @@
#undef set
#undef set1
-static void build_nondc_ii_masks(uint8_t *const mask_v,
- uint8_t *const mask_h,
- uint8_t *const mask_sm,
- const int w, const int h, const int step)
+static COLD void build_nondc_ii_masks(uint8_t *const mask_v,
+ uint8_t *const mask_h,
+ uint8_t *const mask_sm,
+ const int w, const int h, const int step)
{
static const uint8_t ii_weights_1d[] = {
60, 52, 45, 39, 34, 30, 26, 22, 19, 17, 15, 13, 11, 10, 8, 7,
@@ -323,7 +323,7 @@
}
}
-void dav1d_init_interintra_masks(void) {
+COLD void dav1d_init_interintra_masks(void) {
// This function is guaranteed to be called only once
memset(ii_dc_mask, 32, 32 * 32);
--- a/src/win32/thread.c
+++ b/src/win32/thread.c
@@ -33,17 +33,19 @@
#include <stdlib.h>
#include <windows.h>
+#include "common/attributes.h"
+
#include "src/thread.h"
-static unsigned __stdcall thread_entrypoint(void *const data) {
+static COLD unsigned __stdcall thread_entrypoint(void *const data) {
pthread_t *const t = data;
t->arg = t->func(t->arg);
return 0;
}
-int dav1d_pthread_create(pthread_t *const thread,
- const pthread_attr_t *const attr,
- void *(*const func)(void*), void *const arg)
+COLD int dav1d_pthread_create(pthread_t *const thread,
+ const pthread_attr_t *const attr,
+ void *(*const func)(void*), void *const arg)
{
const unsigned stack_size = attr ? attr->stack_size : 0;
thread->func = func;
@@ -53,7 +55,7 @@
return !thread->h;
}
-int dav1d_pthread_join(pthread_t *const thread, void **const res) {
+COLD int dav1d_pthread_join(pthread_t *const thread, void **const res) {
if (WaitForSingleObject(thread->h, INFINITE))
return 1;
@@ -63,8 +65,8 @@
return !CloseHandle(thread->h);
}
-int dav1d_pthread_once(pthread_once_t *const once_control,
- void (*const init_routine)(void))
+COLD int dav1d_pthread_once(pthread_once_t *const once_control,
+ void (*const init_routine)(void))
{
BOOL pending = FALSE;
--- a/src/x86/cdef_init_tmpl.c
+++ b/src/x86/cdef_init_tmpl.c
@@ -44,7 +44,7 @@
decl_cdef_dir_fn(dav1d_cdef_dir_sse4);
decl_cdef_dir_fn(dav1d_cdef_dir_ssse3);
-void bitfn(dav1d_cdef_dsp_init_x86)(Dav1dCdefDSPContext *const c) {
+COLD void bitfn(dav1d_cdef_dsp_init_x86)(Dav1dCdefDSPContext *const c) {
const unsigned flags = dav1d_get_cpu_flags();
if (!(flags & DAV1D_X86_CPU_FLAG_SSSE3)) return;
--- a/src/x86/cpu.c
+++ b/src/x86/cpu.c
@@ -29,12 +29,14 @@
#include <stdint.h>
+#include "common/attributes.h"
+
#include "src/x86/cpu.h"
void dav1d_cpu_cpuid(uint32_t *info, int leaf);
uint64_t dav1d_cpu_xgetbv(int xcr);
-unsigned dav1d_get_cpu_flags_x86(void) {
+COLD unsigned dav1d_get_cpu_flags_x86(void) {
uint32_t info[4] = {0}, n_ids;
unsigned flags = 0;
--- a/src/x86/ipred_init_tmpl.c
+++ b/src/x86/ipred_init_tmpl.c
@@ -75,7 +75,7 @@
decl_pal_pred_fn(dav1d_pal_pred_ssse3);
-void bitfn(dav1d_intra_pred_dsp_init_x86)(Dav1dIntraPredDSPContext *const c) {
+COLD void bitfn(dav1d_intra_pred_dsp_init_x86)(Dav1dIntraPredDSPContext *const c) {
const unsigned flags = dav1d_get_cpu_flags();
if (!(flags & DAV1D_X86_CPU_FLAG_SSSE3)) return;
--- a/src/x86/itx_init_tmpl.c
+++ b/src/x86/itx_init_tmpl.c
@@ -98,7 +98,7 @@
decl_itx_fn(dav1d_inv_txfm_add_dct_dct_64x32_ssse3);
decl_itx_fn(dav1d_inv_txfm_add_dct_dct_64x64_ssse3);
-void bitfn(dav1d_itx_dsp_init_x86)(Dav1dInvTxfmDSPContext *const c) {
+COLD void bitfn(dav1d_itx_dsp_init_x86)(Dav1dInvTxfmDSPContext *const c) {
#define assign_itx_fn(pfx, w, h, type, type_enum, ext) \
c->itxfm_add[pfx##TX_##w##X##h][type_enum] = \
dav1d_inv_txfm_add_##type##_##w##x##h##_##ext
--- a/src/x86/loopfilter_init_tmpl.c
+++ b/src/x86/loopfilter_init_tmpl.c
@@ -33,7 +33,7 @@
decl_loopfilter_sb_fn(dav1d_lpf_h_sb_uv_avx2);
decl_loopfilter_sb_fn(dav1d_lpf_v_sb_uv_avx2);
-void bitfn(dav1d_loop_filter_dsp_init_x86)(Dav1dLoopFilterDSPContext *const c) {
+COLD void bitfn(dav1d_loop_filter_dsp_init_x86)(Dav1dLoopFilterDSPContext *const c) {
const unsigned flags = dav1d_get_cpu_flags();
if (!(flags & DAV1D_X86_CPU_FLAG_AVX2)) return;
--- a/src/x86/looprestoration_init_tmpl.c
+++ b/src/x86/looprestoration_init_tmpl.c
@@ -28,7 +28,6 @@
#include "src/cpu.h"
#include "src/looprestoration.h"
-#include "common/attributes.h"
#include "common/intops.h"
#include "src/tables.h"
@@ -211,7 +210,7 @@
# endif
#endif
-void bitfn(dav1d_loop_restoration_dsp_init_x86)(Dav1dLoopRestorationDSPContext *const c) {
+COLD void bitfn(dav1d_loop_restoration_dsp_init_x86)(Dav1dLoopRestorationDSPContext *const c) {
const unsigned flags = dav1d_get_cpu_flags();
if (!(flags & DAV1D_X86_CPU_FLAG_SSSE3)) return;
--- a/src/x86/mc_init_tmpl.c
+++ b/src/x86/mc_init_tmpl.c
@@ -93,7 +93,7 @@
decl_emu_edge_fn(dav1d_emu_edge_avx2);
decl_emu_edge_fn(dav1d_emu_edge_ssse3);
-void bitfn(dav1d_mc_dsp_init_x86)(Dav1dMCDSPContext *const c) {
+COLD void bitfn(dav1d_mc_dsp_init_x86)(Dav1dMCDSPContext *const c) {
#define init_mc_fn(type, name, suffix) \
c->mc[type] = dav1d_put_##name##_##suffix
#define init_mct_fn(type, name, suffix) \