Reduce memory arena contention

Previously there was one memory arena for all threads, making it the bottleneck for multi-threaded performance. As the number of threads increased, the contention for the lock on the arena would grow, causing other threads to wait to acquire it. This commit makes it use 8 memory arenas, and round-robbins how they are assigned to threads. Threads keep track of the index that they should use into the arena array, assigned the first time the arena is accessed on a given thread. When an image is first created, it is allocated from an arena. When the logic to have multiple arenas is enabled, it then keeps track of the index on the image, so that when deleted it can be returned to the correct arena. Effectively this means that in single-threaded programs, this should not really have an effect. We also do not do this logic if the GIL is enabled, as it effectively acts as the lock on the default arena for us. As expected, this approach has no real noticable effect on regular CPython. On free-threaded CPython, however, there is a massive difference (measuring up to about 70%).
2025-02-11 09:00:59 +03:00 · 2025-01-24 14:14:33 -05:00 · 2025-01-24 14:14:33 -05:00 · fdd80169fd
commit fdd80169fd
parent 51df14282f
4 changed files with 288 additions and 67 deletions
--- a/setup.py
+++ b/setup.py
@ -8,18 +8,21 @@
 # ------------------------------
 from __future__ import annotations
 import distutils.ccompiler
 import os
 import re
 import shutil
 import struct
 import subprocess
 import sys
 import tempfile
 import warnings
 from collections.abc import Iterator
 from typing import Any
 from setuptools import Extension, setup
 from setuptools.command.build_ext import build_ext
 from setuptools.errors import CompileError
 def get_version() -> str:
@ -292,6 +295,47 @@ def _pkg_config(name: str) -> tuple[list[str], list[str]] | None:
    return None
 def _try_compile(compiler: distutils.ccompiler.CCompiler, code: str) -> bool:
    try:
        with tempfile.TemporaryDirectory() as d:
            fn = os.path.join(d, "test.c")
            with open(fn, "w") as f:
                f.write(code)
            compiler.compile([fn], output_dir=d, extra_preargs=["-Werror"])
        return True
    except CompileError:
        return False
 def _try_compile_attr(compiler: distutils.ccompiler.CCompiler, attr: str) -> bool:
    code = f"""
        #pragma GCC diagnostic error "-Wattributes"
        #pragma clang diagnostic error "-Wattributes"
        int {attr} foo;
        int main() {{
            return 0;
        }}
    """
    return _try_compile(compiler, code)
 def _try_compile_tls_define_macros(
    compiler: distutils.ccompiler.CCompiler,
 ) -> list[tuple[str, str | None]]:
    if _try_compile_attr(compiler, "thread_local"):  # C23
        return [("HAVE_THREAD_LOCAL", None)]
    elif _try_compile_attr(compiler, "_Thread_local"):  # C11/C17
        return [("HAVE__THREAD_LOCAL", None)]
    elif _try_compile_attr(compiler, "__thread"):  # GCC/clang
        return [("HAVE___THREAD", None)]
    elif _try_compile_attr(compiler, "__declspec(thread)"):  # MSVC
        return [("HAVE___DECLSPEC_THREAD_", None)]
    else:
        return []
 class pil_build_ext(build_ext):
    class ext_feature:
        features = [
@ -426,12 +470,13 @@ class pil_build_ext(build_ext):
    def _update_extension(
        self,
        name: str,
-        libraries: list[str] | list[str | bool | None],
+        libraries: list[str] | list[str | bool | None] | None = None,
        define_macros: list[tuple[str, str | None]] | None = None,
        sources: list[str] | None = None,
    ) -> None:
        for extension in self.extensions:
            if extension.name == name:
                if libraries is not None:
                    extension.libraries += libraries
                if define_macros is not None:
                    extension.define_macros += define_macros
@ -890,7 +935,10 @@ class pil_build_ext(build_ext):
        defs.append(("PILLOW_VERSION", f'"{PILLOW_VERSION}"'))
-        self._update_extension("PIL._imaging", libs, defs)
+        tls_define_macros = _try_compile_tls_define_macros(self.compiler)
        self._update_extension("PIL._imaging", libs, defs + tls_define_macros)
        self._update_extension("PIL._imagingmath", define_macros=tls_define_macros)
        self._update_extension("PIL._imagingmorph", define_macros=tls_define_macros)
        #
        # additional libraries
@ -913,7 +961,9 @@ class pil_build_ext(build_ext):
                        libs.append(feature.get("fribidi"))
                    else:  # building FriBiDi shim from src/thirdparty
                        srcs.append("src/thirdparty/fribidi-shim/fribidi.c")
-            self._update_extension("PIL._imagingft", libs, defs, srcs)
+            self._update_extension(
                "PIL._imagingft", libs, defs + tls_define_macros, srcs
            )
        else:
            self._remove_extension("PIL._imagingft")
@ -922,19 +972,19 @@ class pil_build_ext(build_ext):
            libs = [feature.get("lcms")]
            if sys.platform == "win32":
                libs.extend(["user32", "gdi32"])
-            self._update_extension("PIL._imagingcms", libs)
+            self._update_extension("PIL._imagingcms", libs, tls_define_macros)
        else:
            self._remove_extension("PIL._imagingcms")
        webp = feature.get("webp")
        if isinstance(webp, str):
            libs = [webp, webp + "mux", webp + "demux"]
-            self._update_extension("PIL._webp", libs)
+            self._update_extension("PIL._webp", libs, tls_define_macros)
        else:
            self._remove_extension("PIL._webp")
        tk_libs = ["psapi"] if sys.platform in ("win32", "cygwin") else []
-        self._update_extension("PIL._imagingtk", tk_libs)
+        self._update_extension("PIL._imagingtk", tk_libs, tls_define_macros)
        build_ext.build_extensions(self)
--- a/src/_imaging.c
+++ b/src/_imaging.c
@ -3938,34 +3938,50 @@ _get_stats(PyObject *self, PyObject *args) {
        return NULL;
    }
-    MUTEX_LOCK(&ImagingDefaultArena.mutex);
+    long stats_new_count = 0;
-    ImagingMemoryArena arena = &ImagingDefaultArena;
+    long stats_allocated_blocks = 0;
    long stats_reused_blocks = 0;
    long stats_reallocated_blocks = 0;
    long stats_freed_blocks = 0;
    long blocks_cached = 0;
-    v = PyLong_FromLong(arena->stats_new_count);
+    uint8_t index;
    ImagingMemoryArena arena;
    IMAGING_ARENAS_FOREACH(index, arena) {
        MUTEX_LOCK(&arena->mutex);
        stats_new_count += arena->stats_new_count;
        stats_allocated_blocks += arena->stats_allocated_blocks;
        stats_reused_blocks += arena->stats_reused_blocks;
        stats_reallocated_blocks += arena->stats_reallocated_blocks;
        stats_freed_blocks += arena->stats_freed_blocks;
        blocks_cached += arena->blocks_cached;
        MUTEX_UNLOCK(&arena->mutex);
    }
    v = PyLong_FromLong(stats_new_count);
    PyDict_SetItemString(d, "new_count", v ? v : Py_None);
    Py_XDECREF(v);
-    v = PyLong_FromLong(arena->stats_allocated_blocks);
+    v = PyLong_FromLong(stats_allocated_blocks);
    PyDict_SetItemString(d, "allocated_blocks", v ? v : Py_None);
    Py_XDECREF(v);
-    v = PyLong_FromLong(arena->stats_reused_blocks);
+    v = PyLong_FromLong(stats_reused_blocks);
    PyDict_SetItemString(d, "reused_blocks", v ? v : Py_None);
    Py_XDECREF(v);
-    v = PyLong_FromLong(arena->stats_reallocated_blocks);
+    v = PyLong_FromLong(stats_reallocated_blocks);
    PyDict_SetItemString(d, "reallocated_blocks", v ? v : Py_None);
    Py_XDECREF(v);
-    v = PyLong_FromLong(arena->stats_freed_blocks);
+    v = PyLong_FromLong(stats_freed_blocks);
    PyDict_SetItemString(d, "freed_blocks", v ? v : Py_None);
    Py_XDECREF(v);
-    v = PyLong_FromLong(arena->blocks_cached);
+    v = PyLong_FromLong(blocks_cached);
    PyDict_SetItemString(d, "blocks_cached", v ? v : Py_None);
    Py_XDECREF(v);
    MUTEX_UNLOCK(&ImagingDefaultArena.mutex);
    return d;
 }
@ -3975,14 +3991,17 @@ _reset_stats(PyObject *self, PyObject *args) {
        return NULL;
    }
-    MUTEX_LOCK(&ImagingDefaultArena.mutex);
+    uint8_t index;
-    ImagingMemoryArena arena = &ImagingDefaultArena;
+    ImagingMemoryArena arena;
    IMAGING_ARENAS_FOREACH(index, arena) {
        MUTEX_LOCK(&arena->mutex);
        arena->stats_new_count = 0;
        arena->stats_allocated_blocks = 0;
        arena->stats_reused_blocks = 0;
        arena->stats_reallocated_blocks = 0;
        arena->stats_freed_blocks = 0;
-    MUTEX_UNLOCK(&ImagingDefaultArena.mutex);
+        MUTEX_UNLOCK(&arena->mutex);
    }
    Py_INCREF(Py_None);
    return Py_None;
@ -3994,9 +4013,10 @@ _get_alignment(PyObject *self, PyObject *args) {
        return NULL;
    }
-    MUTEX_LOCK(&ImagingDefaultArena.mutex);
+    ImagingMemoryArena arena = ImagingGetArena();
-    int alignment = ImagingDefaultArena.alignment;
+    MUTEX_LOCK(&arena->mutex);
-    MUTEX_UNLOCK(&ImagingDefaultArena.mutex);
+    int alignment = arena->alignment;
    MUTEX_UNLOCK(&arena->mutex);
    return PyLong_FromLong(alignment);
 }
@ -4006,9 +4026,10 @@ _get_block_size(PyObject *self, PyObject *args) {
        return NULL;
    }
-    MUTEX_LOCK(&ImagingDefaultArena.mutex);
+    ImagingMemoryArena arena = ImagingGetArena();
-    int block_size = ImagingDefaultArena.block_size;
+    MUTEX_LOCK(&arena->mutex);
-    MUTEX_UNLOCK(&ImagingDefaultArena.mutex);
+    int block_size = arena->block_size;
    MUTEX_UNLOCK(&arena->mutex);
    return PyLong_FromLong(block_size);
 }
@ -4018,9 +4039,10 @@ _get_blocks_max(PyObject *self, PyObject *args) {
        return NULL;
    }
-    MUTEX_LOCK(&ImagingDefaultArena.mutex);
+    ImagingMemoryArena arena = ImagingGetArena();
-    int blocks_max = ImagingDefaultArena.blocks_max;
+    MUTEX_LOCK(&arena->mutex);
-    MUTEX_UNLOCK(&ImagingDefaultArena.mutex);
+    int blocks_max = arena->blocks_max;
    MUTEX_UNLOCK(&arena->mutex);
    return PyLong_FromLong(blocks_max);
 }
@ -4041,9 +4063,13 @@ _set_alignment(PyObject *self, PyObject *args) {
        return NULL;
    }
-    MUTEX_LOCK(&ImagingDefaultArena.mutex);
+    uint8_t index;
-    ImagingDefaultArena.alignment = alignment;
+    ImagingMemoryArena arena;
-    MUTEX_UNLOCK(&ImagingDefaultArena.mutex);
+    IMAGING_ARENAS_FOREACH(index, arena) {
        MUTEX_LOCK(&arena->mutex);
        arena->alignment = alignment;
        MUTEX_UNLOCK(&arena->mutex);
    }
    Py_INCREF(Py_None);
    return Py_None;
@ -4066,9 +4092,13 @@ _set_block_size(PyObject *self, PyObject *args) {
        return NULL;
    }
-    MUTEX_LOCK(&ImagingDefaultArena.mutex);
+    uint8_t index;
-    ImagingDefaultArena.block_size = block_size;
+    ImagingMemoryArena arena;
-    MUTEX_UNLOCK(&ImagingDefaultArena.mutex);
+    IMAGING_ARENAS_FOREACH(index, arena) {
        MUTEX_LOCK(&arena->mutex);
        arena->block_size = block_size;
        MUTEX_UNLOCK(&arena->mutex);
    }
    Py_INCREF(Py_None);
    return Py_None;
@ -4087,15 +4117,21 @@ _set_blocks_max(PyObject *self, PyObject *args) {
    }
    if ((unsigned long)blocks_max >
-        SIZE_MAX / sizeof(ImagingDefaultArena.blocks_pool[0])) {
+        SIZE_MAX / sizeof(ImagingGetArena()->blocks_pool[0])) {
        PyErr_SetString(PyExc_ValueError, "blocks_max is too large");
        return NULL;
    }
-    MUTEX_LOCK(&ImagingDefaultArena.mutex);
+    int status = 0;
-    int status = ImagingMemorySetBlocksMax(&ImagingDefaultArena, blocks_max);
+    uint8_t index;
-    MUTEX_UNLOCK(&ImagingDefaultArena.mutex);
+    ImagingMemoryArena arena;
-    if (!status) {
+    IMAGING_ARENAS_FOREACH(index, arena) {
        MUTEX_LOCK(&arena->mutex);
        status |= ImagingMemorySetBlocksMax(arena, blocks_max);
        MUTEX_UNLOCK(&arena->mutex);
    }
    if (status) {
        return ImagingError_MemoryError();
    }
@ -4111,9 +4147,13 @@ _clear_cache(PyObject *self, PyObject *args) {
        return NULL;
    }
-    MUTEX_LOCK(&ImagingDefaultArena.mutex);
+    uint8_t index;
-    ImagingMemoryClearCache(&ImagingDefaultArena, i);
+    ImagingMemoryArena arena;
-    MUTEX_UNLOCK(&ImagingDefaultArena.mutex);
+    IMAGING_ARENAS_FOREACH(index, arena) {
        MUTEX_LOCK(&arena->mutex);
        ImagingMemoryClearCache(arena, i);
        MUTEX_UNLOCK(&arena->mutex);
    }
    Py_INCREF(Py_None);
    return Py_None;
--- a/src/libImaging/Imaging.h
+++ b/src/libImaging/Imaging.h
@ -51,6 +51,20 @@ extern "C" {
 * extensions, see http://www.effbot.org/zone/pil-extending.htm
 */
 #ifdef Py_GIL_DISABLED
    #if defined(__cplusplus)
    #define IMAGING_TLS thread_local
    #elif defined(HAVE_THREAD_LOCAL)
    #define IMAGING_TLS thread_local
    #elif defined(HAVE__THREAD_LOCAL)
    #define IMAGING_TLS _Thread_local
    #elif defined(HAVE___THREAD)
    #define IMAGING_TLS __thread
    #elif defined(HAVE___DECLSPEC_THREAD_)
    #define IMAGING_TLS __declspec(thread)
    #endif
 #endif
 /* Handles */
 typedef struct ImagingMemoryInstance *Imaging;
@ -104,6 +118,10 @@ struct ImagingMemoryInstance {
    /* Virtual methods */
    void (*destroy)(Imaging im);
 #ifdef IMAGING_TLS
    int arenaindex; /* Index of the arena this image is associated with. */
 #endif
 };
 #define IMAGING_PIXEL_1(im, x, y) ((im)->image8[(y)][(x)])
@ -161,6 +179,9 @@ typedef struct ImagingMemoryArena {
    int stats_reallocated_blocks; /* Number of blocks which were actually reallocated
                                     after retrieving */
    int stats_freed_blocks;       /* Number of freed blocks */
 #ifdef IMAGING_TLS
    int index; /* Index of the arena in the global array. */
 #endif
 #ifdef Py_GIL_DISABLED
    PyMutex mutex;
 #endif
@ -169,7 +190,34 @@ typedef struct ImagingMemoryArena {
 /* Objects */
 /* ------- */
 #ifdef IMAGING_TLS
 /* In this case we both do not have the GIL and have thread-local storage, so we
 * will allocate a set of arenas and associated them with threads one at a time.
 */
 #define IMAGING_ARENAS_COUNT 8
 extern struct ImagingMemoryArena ImagingArenas[IMAGING_ARENAS_COUNT];
 /* Provide a macro that loops through each arena that has been
 * statically-allocated. This is necessary to properly handle stats.
 */
 #define IMAGING_ARENAS_FOREACH(index, arena) \
    for (index = 0, (arena) = &ImagingArenas[index]; index < IMAGING_ARENAS_COUNT; (arena) = &ImagingArenas[++index])
 #else
 /* In this case we either have the GIL or do not have thread-local storage, in
 * which case we will only allocate a single arena.
 */
 extern struct ImagingMemoryArena ImagingDefaultArena;
 /* Provide a macro that loops through each arena that has been
 * statically-allocated. In this case because there is only one, this is
 * effectively a single block of code.
 */
 #define IMAGING_ARENAS_FOREACH(index, arena) \
    for ((void) index, (arena) = &ImagingDefaultArena; (arena); (arena) = NULL)
 #endif
 ImagingMemoryArena ImagingGetArena(void);
 extern int
 ImagingMemorySetBlocksMax(ImagingMemoryArena arena, int blocks_max);
 extern void
--- a/src/libImaging/Storage.c
+++ b/src/libImaging/Storage.c
@ -218,9 +218,10 @@ ImagingNewPrologueSubtype(const char *mode, int xsize, int ysize, int size) {
            break;
    }
-    MUTEX_LOCK(&ImagingDefaultArena.mutex);
+    ImagingMemoryArena arena = ImagingGetArena();
-    ImagingDefaultArena.stats_new_count += 1;
+    MUTEX_LOCK(&arena->mutex);
-    MUTEX_UNLOCK(&ImagingDefaultArena.mutex);
+    arena->stats_new_count += 1;
    MUTEX_UNLOCK(&arena->mutex);
    return im;
 }
@ -258,10 +259,65 @@ ImagingDelete(Imaging im) {
 /* Allocate image as an array of line buffers. */
 #define IMAGING_PAGE_SIZE (4096)
 #define IMAGING_ARENA_BLOCK_SIZE (16 * 1024 * 1024)
 #ifdef IMAGING_TLS
 /* This is the overall process-level index that keeps track of the next index
 * that will be assigned to a thread.
 */
 static uint64_t ImagingArenaIndex = UINT64_MAX;
 /* This is the thread-local index that associated a thread with an arena in the
 * statically-allocated list.
 */
 static IMAGING_TLS uint64_t ImagingArenaThreadIndex = UINT64_MAX;
 /* These are the statically-allocated arenas. */
 struct ImagingMemoryArena ImagingArenas[IMAGING_ARENAS_COUNT] = {
    { 1, IMAGING_ARENA_BLOCK_SIZE, 0, 0, NULL, 0, 0, 0, 0, 0, 0, {0} },
    { 1, IMAGING_ARENA_BLOCK_SIZE, 0, 0, NULL, 0, 0, 0, 0, 0, 1, {0} },
    { 1, IMAGING_ARENA_BLOCK_SIZE, 0, 0, NULL, 0, 0, 0, 0, 0, 2, {0} },
    { 1, IMAGING_ARENA_BLOCK_SIZE, 0, 0, NULL, 0, 0, 0, 0, 0, 3, {0} },
    { 1, IMAGING_ARENA_BLOCK_SIZE, 0, 0, NULL, 0, 0, 0, 0, 0, 4, {0} },
    { 1, IMAGING_ARENA_BLOCK_SIZE, 0, 0, NULL, 0, 0, 0, 0, 0, 5, {0} },
    { 1, IMAGING_ARENA_BLOCK_SIZE, 0, 0, NULL, 0, 0, 0, 0, 0, 6, {0} },
    { 1, IMAGING_ARENA_BLOCK_SIZE, 0, 0, NULL, 0, 0, 0, 0, 0, 7, {0} }
 };
 /* Get a pointer to the correct arena for this context. In this case where we
 * are using a round-robin approach to the statically allocated arenas, we will
 * return the arena that is assigned to the thread on first use.
 */
 ImagingMemoryArena ImagingGetArena(void) {
    if (ImagingArenaThreadIndex == UINT64_MAX) {
        ImagingArenaThreadIndex = _Py_atomic_add_uint64(&ImagingArenaIndex, 1) % IMAGING_ARENAS_COUNT;
    }
    return &ImagingArenas[ImagingArenaThreadIndex];
 }
 /* Return the arena associated with the given image. In this case the index of
 * the arena is stored on the image itself.
 */
 ImagingMemoryArena ImagingGetArenaFromImaging(Imaging im) {
    int arenaindex = im->arenaindex;
    assert(arenaindex >= 0 && arenaindex < IMAGING_ARENAS_COUNT);
    return &ImagingArenas[arenaindex];
 }
 /* Set the arena index on the given image based on the index of the arena. This
 * is necessary in order to return the blocks to the correct arena when the
 * image is destroyed.
 */
 static void ImagingSetArenaOnImaging(Imaging im, ImagingMemoryArena arena) {
    im->arenaindex = arena->index;
 }
 #else
 /* Because we have the GIL (or do not have thread-local storage), we only have a
 * single arena.
 */
 struct ImagingMemoryArena ImagingDefaultArena = {
    1,                         // alignment
-    16 * 1024 * 1024,  // block_size
+    IMAGING_ARENA_BLOCK_SIZE,  // block_size
    0,                         // blocks_max
    0,                         // blocks_cached
    NULL,                      // blocks_pool
@ -271,10 +327,33 @@ struct ImagingMemoryArena ImagingDefaultArena = {
    0,
    0,  // Stats
 #ifdef Py_GIL_DISABLED
    /* On the very off-chance that someone is running free-threaded Python on a
     * platform that does not support thread-local storage, we need a mutex
     * here.
     */
    {0},
 #endif
 };
 /* Get a pointer to the correct arena for this context. In this case where we
 * either have the GIL or we do not have TLS, we will return only the default
 * arena.
 */
 ImagingMemoryArena ImagingGetArena(void) {
    return &ImagingDefaultArena;
 }
 /* Return the arena associated with the given image. In this case because we
 * only have one arena, we always return the default arena.
 */
 #define ImagingGetArenaFromImaging(im) &ImagingDefaultArena
 /* Set the arena index on the given image based on the index of the arena. In
 * this case because we only have one arena, we do not need to do anything.
 */
 #define ImagingSetArenaOnImaging(im, arena)
 #endif
 int
 ImagingMemorySetBlocksMax(ImagingMemoryArena arena, int blocks_max) {
    void *p;
@ -288,18 +367,18 @@ ImagingMemorySetBlocksMax(ImagingMemoryArena arena, int blocks_max) {
        p = realloc(arena->blocks_pool, sizeof(*arena->blocks_pool) * blocks_max);
        if (!p) {
            // Leave previous blocks_max value
-            return 0;
+            return 1;
        }
        arena->blocks_pool = p;
    } else {
        arena->blocks_pool = calloc(sizeof(*arena->blocks_pool), blocks_max);
        if (!arena->blocks_pool) {
-            return 0;
+            return 1;
        }
    }
    arena->blocks_max = blocks_max;
-    return 1;
+    return 0;
 }
 void
@ -369,12 +448,13 @@ ImagingDestroyArray(Imaging im) {
    int y = 0;
    if (im->blocks) {
-        MUTEX_LOCK(&ImagingDefaultArena.mutex);
+        ImagingMemoryArena arena = ImagingGetArenaFromImaging(im);
        MUTEX_LOCK(&arena->mutex);
        while (im->blocks[y].ptr) {
-            memory_return_block(&ImagingDefaultArena, im->blocks[y]);
+            memory_return_block(arena, im->blocks[y]);
            y += 1;
        }
-        MUTEX_UNLOCK(&ImagingDefaultArena.mutex);
+        MUTEX_UNLOCK(&arena->mutex);
        free(im->blocks);
    }
 }
@ -504,11 +584,14 @@ ImagingNewInternal(const char *mode, int xsize, int ysize, int dirty) {
        return NULL;
    }
-    MUTEX_LOCK(&ImagingDefaultArena.mutex);
+    ImagingMemoryArena arena = ImagingGetArena();
    ImagingSetArenaOnImaging(im, arena);
    MUTEX_LOCK(&arena->mutex);
    Imaging tmp = ImagingAllocateArray(
-        im, &ImagingDefaultArena, dirty, ImagingDefaultArena.block_size
+        im, arena, dirty, arena->block_size
    );
-    MUTEX_UNLOCK(&ImagingDefaultArena.mutex);
+    MUTEX_UNLOCK(&arena->mutex);
    if (tmp) {
        return im;
    }
@ -516,9 +599,9 @@ ImagingNewInternal(const char *mode, int xsize, int ysize, int dirty) {
    ImagingError_Clear();
    // Try to allocate the image once more with smallest possible block size
-    MUTEX_LOCK(&ImagingDefaultArena.mutex);
+    MUTEX_LOCK(&arena->mutex);
-    tmp = ImagingAllocateArray(im, &ImagingDefaultArena, dirty, IMAGING_PAGE_SIZE);
+    tmp = ImagingAllocateArray(im, arena, dirty, IMAGING_PAGE_SIZE);
-    MUTEX_UNLOCK(&ImagingDefaultArena.mutex);
+    MUTEX_UNLOCK(&arena->mutex);
    if (tmp) {
        return im;
    }