shithub: dav1d

Download patch

ref: 236e1122da50c23d95766682590d546f787684ea
parent: dc98fff8757e018a4a74217aade64a10b7afd46d
author: Henrik Gramner <gramner@twoorioles.com>
date: Thu Nov 19 20:37:54 EST 2020

Add more buffer pools

Add buffer pools for miscellaneous smaller buffers that are
repeatedly being freed and reallocated.

Also improve dav1d_ref_create() by consolidating two separate
memory allocations into a single one.

--- a/include/common/mem.h
+++ /dev/null
@@ -1,84 +1,0 @@
-/*
- * Copyright © 2018, VideoLAN and dav1d authors
- * Copyright © 2018, Two Orioles, LLC
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * 1. Redistributions of source code must retain the above copyright notice, this
- *    list of conditions and the following disclaimer.
- *
- * 2. Redistributions in binary form must reproduce the above copyright notice,
- *    this list of conditions and the following disclaimer in the documentation
- *    and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
- * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
- * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
- * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
- * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
- * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
- * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
- * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-#ifndef DAV1D_COMMON_MEM_H
-#define DAV1D_COMMON_MEM_H
-
-#include <stdlib.h>
-
-#if defined(HAVE_ALIGNED_MALLOC) || defined(HAVE_MEMALIGN)
-#include <malloc.h>
-#endif
-
-#include "common/attributes.h"
-
-/*
- * Allocate align-byte aligned memory. The return value can be released
- * by calling the dav1d_free_aligned() function.
- */
-static inline void *dav1d_alloc_aligned(size_t sz, size_t align) {
-    assert(!(align & (align - 1)));
-#ifdef HAVE_POSIX_MEMALIGN
-    void *ptr;
-    if (posix_memalign(&ptr, align, sz)) return NULL;
-    return ptr;
-#elif defined(HAVE_ALIGNED_MALLOC)
-    return _aligned_malloc(sz, align);
-#elif defined(HAVE_MEMALIGN)
-    return memalign(align, sz);
-#else
-#error Missing aligned alloc implementation
-#endif
-}
-
-static inline void dav1d_free_aligned(void* ptr) {
-#ifdef HAVE_POSIX_MEMALIGN
-    free(ptr);
-#elif defined(HAVE_ALIGNED_MALLOC)
-    _aligned_free(ptr);
-#elif defined(HAVE_MEMALIGN)
-    free(ptr);
-#endif
-}
-
-static inline void dav1d_freep_aligned(void* ptr) {
-    void **mem = (void **) ptr;
-    if (*mem) {
-        dav1d_free_aligned(*mem);
-        *mem = NULL;
-    }
-}
-
-static inline void freep(void *ptr) {
-    void **mem = (void **) ptr;
-    if (*mem) {
-        free(*mem);
-        *mem = NULL;
-    }
-}
-
-#endif /* DAV1D_COMMON_MEM_H */
--- a/src/cdf.c
+++ b/src/cdf.c
@@ -29,10 +29,7 @@
 
 #include <string.h>
 
-#include "src/thread.h"
-#include "common/intops.h"
-
-#include "src/cdf.h"
+#include "src/internal.h"
 #include "src/tables.h"
 
 #define CDF1(x) (32768-(x))
@@ -4096,11 +4093,11 @@
     }
 }
 
-int dav1d_cdf_thread_alloc(CdfThreadContext *const cdf,
+int dav1d_cdf_thread_alloc(Dav1dContext *const c, CdfThreadContext *const cdf,
                            struct thread_data *const t)
 {
-    cdf->ref = dav1d_ref_create(sizeof(CdfContext) +
-                                (t != NULL) * sizeof(atomic_uint));
+    cdf->ref = dav1d_ref_create_using_pool(&c->cdf_pool,
+                                           sizeof(CdfContext) + sizeof(atomic_uint));
     if (!cdf->ref) return DAV1D_ERR(ENOMEM);
     cdf->data.cdf = cdf->ref->data;
     if (t) {
--- a/src/cdf.h
+++ b/src/cdf.h
@@ -140,7 +140,8 @@
 } CdfThreadContext;
 
 void dav1d_cdf_thread_init_static(CdfThreadContext *cdf, int qidx);
-int dav1d_cdf_thread_alloc(CdfThreadContext *cdf, struct thread_data *t);
+int dav1d_cdf_thread_alloc(Dav1dContext *c, CdfThreadContext *cdf,
+                           struct thread_data *t);
 void dav1d_cdf_thread_copy(CdfContext *dst, const CdfThreadContext *src);
 void dav1d_cdf_thread_ref(CdfThreadContext *dst, CdfThreadContext *src);
 void dav1d_cdf_thread_unref(CdfThreadContext *cdf);
--- a/src/data.c
+++ b/src/data.c
@@ -43,6 +43,7 @@
 uint8_t *dav1d_data_create_internal(Dav1dData *const buf, const size_t sz) {
     validate_input_or_ret(buf != NULL, NULL);
 
+    if (sz > SIZE_MAX / 2) return NULL;
     buf->ref = dav1d_ref_create(sz);
     if (!buf->ref) return NULL;
     buf->data = buf->ref->const_data;
--- a/src/decode.c
+++ b/src/decode.c
@@ -36,7 +36,6 @@
 #include "dav1d/data.h"
 
 #include "common/intops.h"
-#include "common/mem.h"
 
 #include "src/ctx.h"
 #include "src/decode.h"
@@ -3398,7 +3397,7 @@
         dav1d_cdf_thread_ref(&f->in_cdf, &c->cdf[pri_ref]);
     }
     if (f->frame_hdr->refresh_context) {
-        res = dav1d_cdf_thread_alloc(&f->out_cdf, c->n_fc > 1 ? &f->frame_thread.td : NULL);
+        res = dav1d_cdf_thread_alloc(c, &f->out_cdf, c->n_fc > 1 ? &f->frame_thread.td : NULL);
         if (res < 0) goto error;
     }
 
@@ -3463,8 +3462,8 @@
 
     // ref_mvs
     if ((f->frame_hdr->frame_type & 1) || f->frame_hdr->allow_intrabc) {
-        f->mvs_ref = dav1d_ref_create(f->sb128h * 16 * (f->b4_stride >> 1) *
-                                      sizeof(*f->mvs));
+        f->mvs_ref = dav1d_ref_create_using_pool(&c->refmvs_pool,
+            sizeof(*f->mvs) * f->sb128h * 16 * (f->b4_stride >> 1));
         if (!f->mvs_ref) {
             res = DAV1D_ERR(ENOMEM);
             goto error;
@@ -3527,7 +3526,8 @@
             // We're updating an existing map, but need somewhere to
             // put the new values. Allocate them here (the data
             // actually gets set elsewhere)
-            f->cur_segmap_ref = dav1d_ref_create(f->b4_stride * 32 * f->sb128h);
+            f->cur_segmap_ref = dav1d_ref_create_using_pool(&c->segmap_pool,
+                sizeof(*f->cur_segmap) * f->b4_stride * 32 * f->sb128h);
             if (!f->cur_segmap_ref) {
                 dav1d_ref_dec(&f->prev_segmap_ref);
                 res = DAV1D_ERR(ENOMEM);
@@ -3542,13 +3542,14 @@
             f->cur_segmap = f->prev_segmap_ref->data;
         } else {
             // We need to make a new map. Allocate one here and zero it out.
-            f->cur_segmap_ref = dav1d_ref_create(f->b4_stride * 32 * f->sb128h);
+            const size_t segmap_size = sizeof(*f->cur_segmap) * f->b4_stride * 32 * f->sb128h;
+            f->cur_segmap_ref = dav1d_ref_create_using_pool(&c->segmap_pool, segmap_size);
             if (!f->cur_segmap_ref) {
                 res = DAV1D_ERR(ENOMEM);
                 goto error;
             }
             f->cur_segmap = f->cur_segmap_ref->data;
-            memset(f->cur_segmap_ref->data, 0, f->b4_stride * 32 * f->sb128h);
+            memset(f->cur_segmap, 0, segmap_size);
         }
     } else {
         f->cur_segmap = NULL;
--- a/src/internal.h
+++ b/src/internal.h
@@ -82,8 +82,10 @@
     int n_tile_data_alloc;
     int n_tile_data;
     int n_tiles;
+    Dav1dMemPool seq_hdr_pool;
     Dav1dRef *seq_hdr_ref;
     Dav1dSequenceHeader *seq_hdr;
+    Dav1dMemPool frame_hdr_pool;
     Dav1dRef *frame_hdr_ref;
     Dav1dFrameHeader *frame_hdr;
 
@@ -107,6 +109,8 @@
     } frame_thread;
 
     // reference/entropy state
+    Dav1dMemPool segmap_pool;
+    Dav1dMemPool refmvs_pool;
     struct {
         Dav1dThreadPicture p;
         Dav1dRef *segmap;
@@ -113,6 +117,7 @@
         Dav1dRef *refmvs;
         unsigned refpoc[7];
     } refs[8];
+    Dav1dMemPool cdf_pool;
     CdfThreadContext cdf[8];
 
     Dav1dDSPContext dsp[3 /* 8, 10, 12 bits/component */];
@@ -136,10 +141,8 @@
 
     Dav1dLogger logger;
 
-    struct {
-        pthread_mutex_t lock;
-        Dav1dPictureBuffer *buf;
-    } picture_buffer_pool;
+    Dav1dMemPool picture_pool;
+    int mem_pools_inited;
 };
 
 struct Dav1dFrameContext {
--- a/src/lib.c
+++ b/src/lib.c
@@ -38,7 +38,6 @@
 #include "dav1d/dav1d.h"
 #include "dav1d/data.h"
 
-#include "common/mem.h"
 #include "common/validate.h"
 
 #include "src/cpu.h"
@@ -77,6 +76,35 @@
     s->frame_size_limit = 0;
 }
 
+static COLD int init_mem_pools(Dav1dContext *const c) {
+    if (!pthread_mutex_init(&c->seq_hdr_pool.lock, NULL)) {
+        if (!pthread_mutex_init(&c->frame_hdr_pool.lock, NULL)) {
+            if (!pthread_mutex_init(&c->segmap_pool.lock, NULL)) {
+                if (!pthread_mutex_init(&c->refmvs_pool.lock, NULL)) {
+                    if (!pthread_mutex_init(&c->cdf_pool.lock, NULL)) {
+                        if (c->allocator.alloc_picture_callback == dav1d_default_picture_alloc) {
+                            if (!pthread_mutex_init(&c->picture_pool.lock, NULL)) {
+                                c->allocator.cookie = &c->picture_pool;
+                                c->mem_pools_inited = 2;
+                                return 0;
+                            }
+                        } else {
+                            c->mem_pools_inited = 1;
+                            return 0;
+                        }
+                        pthread_mutex_destroy(&c->cdf_pool.lock);
+                    }
+                    pthread_mutex_destroy(&c->refmvs_pool.lock);
+                }
+                pthread_mutex_destroy(&c->segmap_pool.lock);
+            }
+            pthread_mutex_destroy(&c->frame_hdr_pool.lock);
+        }
+        pthread_mutex_destroy(&c->seq_hdr_pool.lock);
+    }
+    return -1;
+}
+
 static void close_internal(Dav1dContext **const c_out, int flush);
 
 NO_SANITIZE("cfi-icall") // CFI is broken with dlsym()
@@ -129,10 +157,7 @@
     c->all_layers = s->all_layers;
     c->frame_size_limit = s->frame_size_limit;
 
-    if (c->allocator.alloc_picture_callback == dav1d_default_picture_alloc) {
-        if (pthread_mutex_init(&c->picture_buffer_pool.lock, NULL)) goto error;
-        c->allocator.cookie = c;
-    }
+    if (init_mem_pools(c)) goto error;
 
     /* On 32-bit systems extremely large frame sizes can cause overflows in
      * dav1d_decode_frame() malloc size calculations. Prevent that from occuring
@@ -577,12 +602,14 @@
     dav1d_ref_dec(&c->content_light_ref);
     dav1d_ref_dec(&c->itut_t35_ref);
 
-    pthread_mutex_destroy(&c->picture_buffer_pool.lock);
-    Dav1dPictureBuffer *buf = c->picture_buffer_pool.buf;
-    while (buf) {
-        Dav1dPictureBuffer *const next = buf->next;
-        dav1d_free_aligned(buf->data);
-        buf = next;
+    if (c->mem_pools_inited) {
+        dav1d_mem_pool_destroy(&c->seq_hdr_pool);
+        dav1d_mem_pool_destroy(&c->frame_hdr_pool);
+        dav1d_mem_pool_destroy(&c->segmap_pool);
+        dav1d_mem_pool_destroy(&c->refmvs_pool);
+        dav1d_mem_pool_destroy(&c->cdf_pool);
+        if (c->mem_pools_inited == 2)
+            dav1d_mem_pool_destroy(&c->picture_pool);
     }
 
     dav1d_freep_aligned(c_out);
--- /dev/null
+++ b/src/mem.c
@@ -1,0 +1,74 @@
+/*
+ * Copyright © 2020, VideoLAN and dav1d authors
+ * Copyright © 2020, Two Orioles, LLC
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice, this
+ *    list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ *    this list of conditions and the following disclaimer in the documentation
+ *    and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
+ * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "config.h"
+
+#include <stdint.h>
+
+#include "src/mem.h"
+#include "src/thread.h"
+
+void dav1d_mem_pool_push(Dav1dMemPool *const pool, Dav1dMemPoolBuffer *const buf) {
+    pthread_mutex_lock(&pool->lock);
+    buf->next = pool->buf;
+    pool->buf = buf;
+    pthread_mutex_unlock(&pool->lock);
+}
+
+Dav1dMemPoolBuffer *dav1d_mem_pool_pop(Dav1dMemPool *const pool, const size_t size) {
+    pthread_mutex_lock(&pool->lock);
+    Dav1dMemPoolBuffer *buf = pool->buf;
+    uint8_t *data;
+    if (buf) {
+        pool->buf = buf->next;
+        pthread_mutex_unlock(&pool->lock);
+        data = buf->data;
+        if ((uintptr_t)buf - (uintptr_t)data != size) {
+            dav1d_free_aligned(data);
+            goto alloc;
+        }
+    } else {
+        pthread_mutex_unlock(&pool->lock);
+alloc:
+        data = dav1d_alloc_aligned(size + sizeof(Dav1dMemPoolBuffer), 64);
+        if (!data) return NULL;
+        buf = (Dav1dMemPoolBuffer*)(data + size);
+        buf->data = data;
+    }
+
+    return buf;
+}
+
+COLD void dav1d_mem_pool_destroy(Dav1dMemPool *const pool) {
+    pthread_mutex_destroy(&pool->lock);
+    Dav1dMemPoolBuffer *buf = pool->buf;
+    while (buf) {
+        void *const data = buf->data;
+        buf = buf->next;
+        dav1d_free_aligned(data);
+    }
+}
--- /dev/null
+++ b/src/mem.h
@@ -1,0 +1,100 @@
+/*
+ * Copyright © 2018, VideoLAN and dav1d authors
+ * Copyright © 2018, Two Orioles, LLC
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice, this
+ *    list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ *    this list of conditions and the following disclaimer in the documentation
+ *    and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
+ * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef DAV1D_SRC_MEM_H
+#define DAV1D_SRC_MEM_H
+
+#include <stdlib.h>
+
+#if defined(HAVE_ALIGNED_MALLOC) || defined(HAVE_MEMALIGN)
+#include <malloc.h>
+#endif
+
+#include "common/attributes.h"
+
+#include "src/thread.h"
+
+typedef struct Dav1dMemPoolBuffer {
+    void *data;
+    struct Dav1dMemPoolBuffer *next;
+} Dav1dMemPoolBuffer;
+
+typedef struct Dav1dMemPool {
+    pthread_mutex_t lock;
+    Dav1dMemPoolBuffer *buf;
+} Dav1dMemPool;
+
+void dav1d_mem_pool_push(Dav1dMemPool *pool, Dav1dMemPoolBuffer *buf);
+Dav1dMemPoolBuffer *dav1d_mem_pool_pop(Dav1dMemPool *pool, size_t size);
+void dav1d_mem_pool_destroy(Dav1dMemPool *pool);
+
+/*
+ * Allocate align-byte aligned memory. The return value can be released
+ * by calling the dav1d_free_aligned() function.
+ */
+static inline void *dav1d_alloc_aligned(size_t sz, size_t align) {
+    assert(!(align & (align - 1)));
+#ifdef HAVE_POSIX_MEMALIGN
+    void *ptr;
+    if (posix_memalign(&ptr, align, sz)) return NULL;
+    return ptr;
+#elif defined(HAVE_ALIGNED_MALLOC)
+    return _aligned_malloc(sz, align);
+#elif defined(HAVE_MEMALIGN)
+    return memalign(align, sz);
+#else
+#error Missing aligned alloc implementation
+#endif
+}
+
+static inline void dav1d_free_aligned(void* ptr) {
+#ifdef HAVE_POSIX_MEMALIGN
+    free(ptr);
+#elif defined(HAVE_ALIGNED_MALLOC)
+    _aligned_free(ptr);
+#elif defined(HAVE_MEMALIGN)
+    free(ptr);
+#endif
+}
+
+static inline void dav1d_freep_aligned(void* ptr) {
+    void **mem = (void **) ptr;
+    if (*mem) {
+        dav1d_free_aligned(*mem);
+        *mem = NULL;
+    }
+}
+
+static inline void freep(void *ptr) {
+    void **mem = (void **) ptr;
+    if (*mem) {
+        free(*mem);
+        *mem = NULL;
+    }
+}
+
+#endif /* DAV1D_SRC_MEM_H */
--- a/src/meson.build
+++ b/src/meson.build
@@ -38,6 +38,7 @@
     'itx_1d.c',
     'lf_mask.c',
     'log.c',
+    'mem.c',
     'msac.c',
     'obu.c',
     'picture.c',
--- a/src/obu.c
+++ b/src/obu.c
@@ -1234,7 +1234,8 @@
 
     switch (type) {
     case DAV1D_OBU_SEQ_HDR: {
-        Dav1dRef *ref = dav1d_ref_create(sizeof(Dav1dSequenceHeader));
+        Dav1dRef *ref = dav1d_ref_create_using_pool(&c->seq_hdr_pool,
+                                                    sizeof(Dav1dSequenceHeader));
         if (!ref) return DAV1D_ERR(ENOMEM);
         Dav1dSequenceHeader *seq_hdr = ref->data;
         memset(seq_hdr, 0, sizeof(*seq_hdr));
@@ -1280,7 +1281,8 @@
         if (global) break;
         if (!c->seq_hdr) goto error;
         if (!c->frame_hdr_ref) {
-            c->frame_hdr_ref = dav1d_ref_create(sizeof(Dav1dFrameHeader));
+            c->frame_hdr_ref = dav1d_ref_create_using_pool(&c->frame_hdr_pool,
+                                                           sizeof(Dav1dFrameHeader));
             if (!c->frame_hdr_ref) return DAV1D_ERR(ENOMEM);
         }
 #ifndef NDEBUG
--- a/src/picture.c
+++ b/src/picture.c
@@ -34,7 +34,6 @@
 #include <string.h>
 
 #include "common/intops.h"
-#include "common/mem.h"
 #include "common/validate.h"
 
 #include "src/internal.h"
@@ -45,7 +44,7 @@
 #include "src/thread_task.h"
 
 int dav1d_default_picture_alloc(Dav1dPicture *const p, void *const cookie) {
-    assert(sizeof(Dav1dPictureBuffer) <= DAV1D_PICTURE_ALIGNMENT);
+    assert(sizeof(Dav1dMemPoolBuffer) <= DAV1D_PICTURE_ALIGNMENT);
     const int hbd = p->p.bpc > 8;
     const int aligned_w = (p->p.w + 127) & ~127;
     const int aligned_h = (p->p.h + 127) & ~127;
@@ -69,30 +68,13 @@
     const size_t uv_sz = uv_stride * (aligned_h >> ss_ver);
     const size_t pic_size = y_sz + 2 * uv_sz;
 
-    /* Pop buffer from the pool. */
-    Dav1dContext *const c = cookie;
-    pthread_mutex_lock(&c->picture_buffer_pool.lock);
-    Dav1dPictureBuffer *buf = c->picture_buffer_pool.buf;
-    uint8_t *data;
-    if (buf) {
-        c->picture_buffer_pool.buf = buf->next;
-        pthread_mutex_unlock(&c->picture_buffer_pool.lock);
-        data = buf->data;
-        if ((uintptr_t)buf - (uintptr_t)data != pic_size) {
-            dav1d_free_aligned(data);
-            goto alloc;
-        }
-    } else {
-        pthread_mutex_unlock(&c->picture_buffer_pool.lock);
-alloc:
-        data = dav1d_alloc_aligned(pic_size + DAV1D_PICTURE_ALIGNMENT,
-                                   DAV1D_PICTURE_ALIGNMENT);
-        if (!data) return DAV1D_ERR(ENOMEM);
-        buf = (Dav1dPictureBuffer*)(data + pic_size);
-        buf->data = data;
-    }
+    Dav1dMemPoolBuffer *const buf = dav1d_mem_pool_pop(cookie, pic_size +
+                                                       DAV1D_PICTURE_ALIGNMENT -
+                                                       sizeof(Dav1dMemPoolBuffer));
+    if (!buf) return DAV1D_ERR(ENOMEM);
     p->allocator_data = buf;
 
+    uint8_t *const data = buf->data;
     p->data[0] = data;
     p->data[1] = has_chroma ? data + y_sz : NULL;
     p->data[2] = has_chroma ? data + y_sz + uv_sz : NULL;
@@ -101,13 +83,7 @@
 }
 
 void dav1d_default_picture_release(Dav1dPicture *const p, void *const cookie) {
-    /* Push buffer to the pool. */
-    Dav1dContext *const c = cookie;
-    Dav1dPictureBuffer *const buf = p->allocator_data;
-    pthread_mutex_lock(&c->picture_buffer_pool.lock);
-    buf->next = c->picture_buffer_pool.buf;
-    c->picture_buffer_pool.buf = buf;
-    pthread_mutex_unlock(&c->picture_buffer_pool.lock);
+    dav1d_mem_pool_push(cookie, p->allocator_data);
 }
 
 struct pic_ctx_context {
--- a/src/recon_tmpl.c
+++ b/src/recon_tmpl.c
@@ -34,7 +34,6 @@
 #include "common/bitdepth.h"
 #include "common/dump.h"
 #include "common/intops.h"
-#include "common/mem.h"
 
 #include "src/cdef_apply.h"
 #include "src/ctx.h"
--- a/src/ref.c
+++ b/src/ref.c
@@ -27,8 +27,6 @@
 
 #include "config.h"
 
-#include "common/mem.h"
-
 #include "src/ref.h"
 
 static void default_free_callback(const uint8_t *const data, void *const user_data) {
@@ -36,19 +34,43 @@
     dav1d_free_aligned(user_data);
 }
 
-Dav1dRef *dav1d_ref_create(const size_t size) {
-    void *data = dav1d_alloc_aligned(size, 32);
+Dav1dRef *dav1d_ref_create(size_t size) {
+    size = (size + sizeof(void*) - 1) & ~(sizeof(void*) - 1);
+
+    uint8_t *const data = dav1d_alloc_aligned(size + sizeof(Dav1dRef), 64);
     if (!data) return NULL;
 
-    Dav1dRef *const res = dav1d_ref_wrap(data, default_free_callback, data);
-    if (res)
-        res->data = data;
-    else
-        dav1d_free_aligned(data);
+    Dav1dRef *const res = (Dav1dRef*)(data + size);
+    res->const_data = res->user_data = res->data = data;
+    atomic_init(&res->ref_cnt, 1);
+    res->free_ref = 0;
+    res->free_callback = default_free_callback;
 
     return res;
 }
 
+static void pool_free_callback(const uint8_t *const data, void *const user_data) {
+    dav1d_mem_pool_push((Dav1dMemPool*)data, user_data);
+}
+
+Dav1dRef *dav1d_ref_create_using_pool(Dav1dMemPool *const pool, size_t size) {
+    size = (size + sizeof(void*) - 1) & ~(sizeof(void*) - 1);
+
+    Dav1dMemPoolBuffer *const buf =
+        dav1d_mem_pool_pop(pool, size + sizeof(Dav1dRef));
+    if (!buf) return NULL;
+
+    Dav1dRef *const res = &((Dav1dRef*)buf)[-1];
+    res->data = buf->data;
+    res->const_data = pool;
+    atomic_init(&res->ref_cnt, 1);
+    res->free_ref = 0;
+    res->free_callback = pool_free_callback;
+    res->user_data = buf;
+
+    return res;
+}
+
 Dav1dRef *dav1d_ref_wrap(const uint8_t *const ptr,
                          void (*free_callback)(const uint8_t *data, void *user_data),
                          void *const user_data)
@@ -59,6 +81,7 @@
     res->data = NULL;
     res->const_data = ptr;
     atomic_init(&res->ref_cnt, 1);
+    res->free_ref = 1;
     res->free_callback = free_callback;
     res->user_data = user_data;
 
@@ -76,8 +99,9 @@
     if (!ref) return;
 
     if (atomic_fetch_sub(&ref->ref_cnt, 1) == 1) {
+        const int free_ref = ref->free_ref;
         ref->free_callback(ref->const_data, ref->user_data);
-        free(ref);
+        if (free_ref) free(ref);
     }
     *pref = NULL;
 }
--- a/src/ref.h
+++ b/src/ref.h
@@ -30,6 +30,9 @@
 
 #include "dav1d/dav1d.h"
 
+#include "src/mem.h"
+#include "src/thread.h"
+
 #include <stdatomic.h>
 #include <stddef.h>
 
@@ -37,11 +40,13 @@
     void *data;
     const void *const_data;
     atomic_int ref_cnt;
+    int free_ref;
     void (*free_callback)(const uint8_t *data, void *user_data);
     void *user_data;
 };
 
 Dav1dRef *dav1d_ref_create(size_t size);
+Dav1dRef *dav1d_ref_create_using_pool(Dav1dMemPool *pool, size_t size);
 Dav1dRef *dav1d_ref_wrap(const uint8_t *ptr,
                          void (*free_callback)(const uint8_t *data, void *user_data),
                          void *user_data);