shithub: dav1d

Download patch

ref: 9057d286bedbbf9e3b73e0e3d7e056c4cd149fd7
parent: 920079edb1ead2f2cbeaf46fd6643ebb344b8b6b
author: Henrik Gramner <gramner@twoorioles.com>
date: Mon Nov 16 08:58:37 EST 2020

Add a picture buffer pool

Reuse buffers allocated for picture data instead of constantly
freeing and allocating new ones.

The impact of this can vary significantly between different systems,
in particular it's highly beneficial on Windows where it can result
in an overall performance increase of up to 10% in some cases.

--- a/src/internal.h
+++ b/src/internal.h
@@ -135,6 +135,11 @@
     int drain;
 
     Dav1dLogger logger;
+
+    struct {
+        pthread_mutex_t lock;
+        Dav1dPictureBuffer *buf;
+    } picture_buffer_pool;
 };
 
 struct Dav1dFrameContext {
--- a/src/lib.c
+++ b/src/lib.c
@@ -129,6 +129,11 @@
     c->all_layers = s->all_layers;
     c->frame_size_limit = s->frame_size_limit;
 
+    if (c->allocator.alloc_picture_callback == dav1d_default_picture_alloc) {
+        if (pthread_mutex_init(&c->picture_buffer_pool.lock, NULL)) goto error;
+        c->allocator.cookie = c;
+    }
+
     /* On 32-bit systems extremely large frame sizes can cause overflows in
      * dav1d_decode_frame() malloc size calculations. Prevent that from occuring
      * by enforcing a maximum frame size limit, chosen to roughly correspond to
@@ -571,6 +576,14 @@
     dav1d_ref_dec(&c->mastering_display_ref);
     dav1d_ref_dec(&c->content_light_ref);
     dav1d_ref_dec(&c->itut_t35_ref);
+
+    pthread_mutex_destroy(&c->picture_buffer_pool.lock);
+    Dav1dPictureBuffer *buf = c->picture_buffer_pool.buf;
+    while (buf) {
+        Dav1dPictureBuffer *const next = buf->next;
+        dav1d_free_aligned(buf->data);
+        buf = next;
+    }
 
     dav1d_freep_aligned(c_out);
 }
--- a/src/picture.c
+++ b/src/picture.c
@@ -45,7 +45,7 @@
 #include "src/thread_task.h"
 
 int dav1d_default_picture_alloc(Dav1dPicture *const p, void *const cookie) {
-    assert(cookie == NULL);
+    assert(sizeof(Dav1dPictureBuffer) <= DAV1D_PICTURE_ALIGNMENT);
     const int hbd = p->p.bpc > 8;
     const int aligned_w = (p->p.w + 127) & ~127;
     const int aligned_h = (p->p.h + 127) & ~127;
@@ -67,27 +67,47 @@
     p->stride[1] = uv_stride;
     const size_t y_sz = y_stride * aligned_h;
     const size_t uv_sz = uv_stride * (aligned_h >> ss_ver);
-    const size_t pic_size = y_sz + 2 * uv_sz + DAV1D_PICTURE_ALIGNMENT;
-    uint8_t *const data = dav1d_alloc_aligned(pic_size, DAV1D_PICTURE_ALIGNMENT);
-    if (!data) return DAV1D_ERR(ENOMEM);
+    const size_t pic_size = y_sz + 2 * uv_sz;
 
+    /* Pop buffer from the pool. */
+    Dav1dContext *const c = cookie;
+    pthread_mutex_lock(&c->picture_buffer_pool.lock);
+    Dav1dPictureBuffer *buf = c->picture_buffer_pool.buf;
+    uint8_t *data;
+    if (buf) {
+        c->picture_buffer_pool.buf = buf->next;
+        pthread_mutex_unlock(&c->picture_buffer_pool.lock);
+        data = buf->data;
+        if ((uintptr_t)buf - (uintptr_t)data != pic_size) {
+            dav1d_free_aligned(data);
+            goto alloc;
+        }
+    } else {
+        pthread_mutex_unlock(&c->picture_buffer_pool.lock);
+alloc:
+        data = dav1d_alloc_aligned(pic_size + DAV1D_PICTURE_ALIGNMENT,
+                                   DAV1D_PICTURE_ALIGNMENT);
+        if (!data) return DAV1D_ERR(ENOMEM);
+        buf = (Dav1dPictureBuffer*)(data + pic_size);
+        buf->data = data;
+    }
+    p->allocator_data = buf;
+
     p->data[0] = data;
     p->data[1] = has_chroma ? data + y_sz : NULL;
     p->data[2] = has_chroma ? data + y_sz + uv_sz : NULL;
 
-#ifndef NDEBUG /* safety check */
-    p->allocator_data = data;
-#endif
-
     return 0;
 }
 
 void dav1d_default_picture_release(Dav1dPicture *const p, void *const cookie) {
-    assert(cookie == NULL);
-#ifndef NDEBUG /* safety check */
-    assert(p->allocator_data == p->data[0]);
-#endif
-    dav1d_free_aligned(p->data[0]);
+    /* Push buffer to the pool. */
+    Dav1dContext *const c = cookie;
+    Dav1dPictureBuffer *const buf = p->allocator_data;
+    pthread_mutex_lock(&c->picture_buffer_pool.lock);
+    buf->next = c->picture_buffer_pool.buf;
+    c->picture_buffer_pool.buf = buf;
+    pthread_mutex_unlock(&c->picture_buffer_pool.lock);
 }
 
 struct pic_ctx_context {
--- a/src/picture.h
+++ b/src/picture.h
@@ -52,6 +52,11 @@
     atomic_uint *progress;
 } Dav1dThreadPicture;
 
+typedef struct Dav1dPictureBuffer {
+    void *data;
+    struct Dav1dPictureBuffer *next;
+} Dav1dPictureBuffer;
+
 /*
  * Allocate a picture with custom border size.
  */