Thread-local arenas

Currently, all threads use the same arena for imaging. This can
result in a lot of contention when there are enough workers and
the mutex is constantly being checked.

This commit instead introduces lockless thread-local arenas for
environments that support it.
This commit is contained in:
Kevin Newton 2025-01-09 12:15:36 -05:00
parent f521a4be7d
commit cfb2dcdfd0
4 changed files with 114 additions and 37 deletions

View File

@ -8,18 +8,21 @@
# ------------------------------ # ------------------------------
from __future__ import annotations from __future__ import annotations
import distutils.ccompiler
import os import os
import re import re
import shutil import shutil
import struct import struct
import subprocess import subprocess
import sys import sys
import tempfile
import warnings import warnings
from collections.abc import Iterator from collections.abc import Iterator
from typing import Any from typing import Any
from setuptools import Extension, setup from setuptools import Extension, setup
from setuptools.command.build_ext import build_ext from setuptools.command.build_ext import build_ext
from setuptools.errors import CompileError
def get_version() -> str: def get_version() -> str:
@ -292,6 +295,47 @@ def _pkg_config(name: str) -> tuple[list[str], list[str]] | None:
return None return None
def _try_compile(compiler: distutils.ccompiler.CCompiler, code: str) -> bool:
try:
with tempfile.TemporaryDirectory() as d:
fn = os.path.join(d, "test.c")
with open(fn, "w") as f:
f.write(code)
compiler.compile([fn], output_dir=d, extra_preargs=["-Werror"])
return True
except CompileError:
return False
def _try_compile_attr(compiler: distutils.ccompiler.CCompiler, attr: str) -> bool:
code = f"""
#pragma GCC diagnostic error "-Wattributes"
#pragma clang diagnostic error "-Wattributes"
int {attr} foo;
int main() {{
return 0;
}}
"""
return _try_compile(compiler, code)
def _try_compile_tls_define_macros(
compiler: distutils.ccompiler.CCompiler,
) -> list[tuple[str, str | None]]:
if _try_compile_attr(compiler, "thread_local"): # C23
return [("HAVE_THREAD_LOCAL", None)]
elif _try_compile_attr(compiler, "_Thread_local"): # C11/C17
return [("HAVE__THREAD_LOCAL", None)]
elif _try_compile_attr(compiler, "__thread"): # GCC/clang
return [("HAVE___THREAD", None)]
elif _try_compile_attr(compiler, "__declspec(thread)"): # MSVC
return [("HAVE___DECLSPEC_THREAD_", None)]
else:
return []
class pil_build_ext(build_ext): class pil_build_ext(build_ext):
class ext_feature: class ext_feature:
features = [ features = [
@ -426,13 +470,14 @@ class pil_build_ext(build_ext):
def _update_extension( def _update_extension(
self, self,
name: str, name: str,
libraries: list[str] | list[str | bool | None], libraries: list[str] | list[str | bool | None] | None = None,
define_macros: list[tuple[str, str | None]] | None = None, define_macros: list[tuple[str, str | None]] | None = None,
sources: list[str] | None = None, sources: list[str] | None = None,
) -> None: ) -> None:
for extension in self.extensions: for extension in self.extensions:
if extension.name == name: if extension.name == name:
extension.libraries += libraries if libraries is not None:
extension.libraries += libraries
if define_macros is not None: if define_macros is not None:
extension.define_macros += define_macros extension.define_macros += define_macros
if sources is not None: if sources is not None:
@ -890,7 +935,10 @@ class pil_build_ext(build_ext):
defs.append(("PILLOW_VERSION", f'"{PILLOW_VERSION}"')) defs.append(("PILLOW_VERSION", f'"{PILLOW_VERSION}"'))
self._update_extension("PIL._imaging", libs, defs) tls_define_macros = _try_compile_tls_define_macros(self.compiler)
self._update_extension("PIL._imaging", libs, defs + tls_define_macros)
self._update_extension("PIL._imagingmath", define_macros=tls_define_macros)
self._update_extension("PIL._imagingmorph", define_macros=tls_define_macros)
# #
# additional libraries # additional libraries
@ -913,7 +961,9 @@ class pil_build_ext(build_ext):
libs.append(feature.get("fribidi")) libs.append(feature.get("fribidi"))
else: # building FriBiDi shim from src/thirdparty else: # building FriBiDi shim from src/thirdparty
srcs.append("src/thirdparty/fribidi-shim/fribidi.c") srcs.append("src/thirdparty/fribidi-shim/fribidi.c")
self._update_extension("PIL._imagingft", libs, defs, srcs) self._update_extension(
"PIL._imagingft", libs, defs + tls_define_macros, srcs
)
else: else:
self._remove_extension("PIL._imagingft") self._remove_extension("PIL._imagingft")
@ -922,19 +972,19 @@ class pil_build_ext(build_ext):
libs = [feature.get("lcms")] libs = [feature.get("lcms")]
if sys.platform == "win32": if sys.platform == "win32":
libs.extend(["user32", "gdi32"]) libs.extend(["user32", "gdi32"])
self._update_extension("PIL._imagingcms", libs) self._update_extension("PIL._imagingcms", libs, tls_define_macros)
else: else:
self._remove_extension("PIL._imagingcms") self._remove_extension("PIL._imagingcms")
webp = feature.get("webp") webp = feature.get("webp")
if isinstance(webp, str): if isinstance(webp, str):
libs = [webp, webp + "mux", webp + "demux"] libs = [webp, webp + "mux", webp + "demux"]
self._update_extension("PIL._webp", libs) self._update_extension("PIL._webp", libs, tls_define_macros)
else: else:
self._remove_extension("PIL._webp") self._remove_extension("PIL._webp")
tk_libs = ["psapi"] if sys.platform in ("win32", "cygwin") else [] tk_libs = ["psapi"] if sys.platform in ("win32", "cygwin") else []
self._update_extension("PIL._imagingtk", tk_libs) self._update_extension("PIL._imagingtk", tk_libs, tls_define_macros)
build_ext.build_extensions(self) build_ext.build_extensions(self)

View File

@ -3938,7 +3938,7 @@ _get_stats(PyObject *self, PyObject *args) {
return NULL; return NULL;
} }
MUTEX_LOCK(&ImagingDefaultArena.mutex); IMAGING_ARENA_LOCK(&ImagingDefaultArena.mutex);
ImagingMemoryArena arena = &ImagingDefaultArena; ImagingMemoryArena arena = &ImagingDefaultArena;
v = PyLong_FromLong(arena->stats_new_count); v = PyLong_FromLong(arena->stats_new_count);
@ -3965,7 +3965,7 @@ _get_stats(PyObject *self, PyObject *args) {
PyDict_SetItemString(d, "blocks_cached", v ? v : Py_None); PyDict_SetItemString(d, "blocks_cached", v ? v : Py_None);
Py_XDECREF(v); Py_XDECREF(v);
MUTEX_UNLOCK(&ImagingDefaultArena.mutex); IMAGING_ARENA_UNLOCK(&ImagingDefaultArena.mutex);
return d; return d;
} }
@ -3975,14 +3975,14 @@ _reset_stats(PyObject *self, PyObject *args) {
return NULL; return NULL;
} }
MUTEX_LOCK(&ImagingDefaultArena.mutex); IMAGING_ARENA_LOCK(&ImagingDefaultArena.mutex);
ImagingMemoryArena arena = &ImagingDefaultArena; ImagingMemoryArena arena = &ImagingDefaultArena;
arena->stats_new_count = 0; arena->stats_new_count = 0;
arena->stats_allocated_blocks = 0; arena->stats_allocated_blocks = 0;
arena->stats_reused_blocks = 0; arena->stats_reused_blocks = 0;
arena->stats_reallocated_blocks = 0; arena->stats_reallocated_blocks = 0;
arena->stats_freed_blocks = 0; arena->stats_freed_blocks = 0;
MUTEX_UNLOCK(&ImagingDefaultArena.mutex); IMAGING_ARENA_UNLOCK(&ImagingDefaultArena.mutex);
Py_INCREF(Py_None); Py_INCREF(Py_None);
return Py_None; return Py_None;
@ -3994,9 +3994,9 @@ _get_alignment(PyObject *self, PyObject *args) {
return NULL; return NULL;
} }
MUTEX_LOCK(&ImagingDefaultArena.mutex); IMAGING_ARENA_LOCK(&ImagingDefaultArena.mutex);
int alignment = ImagingDefaultArena.alignment; int alignment = ImagingDefaultArena.alignment;
MUTEX_UNLOCK(&ImagingDefaultArena.mutex); IMAGING_ARENA_UNLOCK(&ImagingDefaultArena.mutex);
return PyLong_FromLong(alignment); return PyLong_FromLong(alignment);
} }
@ -4006,9 +4006,9 @@ _get_block_size(PyObject *self, PyObject *args) {
return NULL; return NULL;
} }
MUTEX_LOCK(&ImagingDefaultArena.mutex); IMAGING_ARENA_LOCK(&ImagingDefaultArena.mutex);
int block_size = ImagingDefaultArena.block_size; int block_size = ImagingDefaultArena.block_size;
MUTEX_UNLOCK(&ImagingDefaultArena.mutex); IMAGING_ARENA_UNLOCK(&ImagingDefaultArena.mutex);
return PyLong_FromLong(block_size); return PyLong_FromLong(block_size);
} }
@ -4018,9 +4018,9 @@ _get_blocks_max(PyObject *self, PyObject *args) {
return NULL; return NULL;
} }
MUTEX_LOCK(&ImagingDefaultArena.mutex); IMAGING_ARENA_LOCK(&ImagingDefaultArena.mutex);
int blocks_max = ImagingDefaultArena.blocks_max; int blocks_max = ImagingDefaultArena.blocks_max;
MUTEX_UNLOCK(&ImagingDefaultArena.mutex); IMAGING_ARENA_UNLOCK(&ImagingDefaultArena.mutex);
return PyLong_FromLong(blocks_max); return PyLong_FromLong(blocks_max);
} }
@ -4041,9 +4041,9 @@ _set_alignment(PyObject *self, PyObject *args) {
return NULL; return NULL;
} }
MUTEX_LOCK(&ImagingDefaultArena.mutex); IMAGING_ARENA_LOCK(&ImagingDefaultArena.mutex);
ImagingDefaultArena.alignment = alignment; ImagingDefaultArena.alignment = alignment;
MUTEX_UNLOCK(&ImagingDefaultArena.mutex); IMAGING_ARENA_UNLOCK(&ImagingDefaultArena.mutex);
Py_INCREF(Py_None); Py_INCREF(Py_None);
return Py_None; return Py_None;
@ -4066,9 +4066,9 @@ _set_block_size(PyObject *self, PyObject *args) {
return NULL; return NULL;
} }
MUTEX_LOCK(&ImagingDefaultArena.mutex); IMAGING_ARENA_LOCK(&ImagingDefaultArena.mutex);
ImagingDefaultArena.block_size = block_size; ImagingDefaultArena.block_size = block_size;
MUTEX_UNLOCK(&ImagingDefaultArena.mutex); IMAGING_ARENA_UNLOCK(&ImagingDefaultArena.mutex);
Py_INCREF(Py_None); Py_INCREF(Py_None);
return Py_None; return Py_None;
@ -4092,9 +4092,9 @@ _set_blocks_max(PyObject *self, PyObject *args) {
return NULL; return NULL;
} }
MUTEX_LOCK(&ImagingDefaultArena.mutex); IMAGING_ARENA_LOCK(&ImagingDefaultArena.mutex);
int status = ImagingMemorySetBlocksMax(&ImagingDefaultArena, blocks_max); int status = ImagingMemorySetBlocksMax(&ImagingDefaultArena, blocks_max);
MUTEX_UNLOCK(&ImagingDefaultArena.mutex); IMAGING_ARENA_UNLOCK(&ImagingDefaultArena.mutex);
if (!status) { if (!status) {
return ImagingError_MemoryError(); return ImagingError_MemoryError();
} }
@ -4111,9 +4111,9 @@ _clear_cache(PyObject *self, PyObject *args) {
return NULL; return NULL;
} }
MUTEX_LOCK(&ImagingDefaultArena.mutex); IMAGING_ARENA_LOCK(&ImagingDefaultArena.mutex);
ImagingMemoryClearCache(&ImagingDefaultArena, i); ImagingMemoryClearCache(&ImagingDefaultArena, i);
MUTEX_UNLOCK(&ImagingDefaultArena.mutex); IMAGING_ARENA_UNLOCK(&ImagingDefaultArena.mutex);
Py_INCREF(Py_None); Py_INCREF(Py_None);
return Py_None; return Py_None;

View File

@ -149,6 +149,32 @@ struct ImagingPaletteInstance {
int keep_cache; /* This palette will be reused; keep cache */ int keep_cache; /* This palette will be reused; keep cache */
}; };
#define IMAGING_ARENA_LOCK(m)
#define IMAGING_ARENA_UNLOCK(m)
#if defined(__cplusplus)
#define IMAGING_ARENA_TLS thread_local
#elif defined(HAVE_THREAD_LOCAL)
#define IMAGING_ARENA_TLS thread_local
#elif defined(HAVE__THREAD_LOCAL)
#define IMAGING_ARENA_TLS _Thread_local
#elif defined(HAVE___THREAD)
#define IMAGING_ARENA_TLS __thread
#elif defined(HAVE___DECLSPEC_THREAD_)
#define IMAGING_ARENA_TLS __declspec(thread)
#elif defined(Py_GIL_DISABLED)
#define IMAGING_ARENA_TLS
#define IMAGING_ARENA_LOCKING
#undef IMAGING_ARENA_LOCK
#undef IMAGING_ARENA_UNLOCK
#define IMAGING_ARENA_LOCK(m) PyMutex_Lock(m)
#define IMAGING_ARENA_UNLOCK(m) PyMutex_Unlock(m)
#else
#define IMAGING_ARENA_TLS
#endif
typedef struct ImagingMemoryArena { typedef struct ImagingMemoryArena {
int alignment; /* Alignment in memory of each line of an image */ int alignment; /* Alignment in memory of each line of an image */
int block_size; /* Preferred block size, bytes */ int block_size; /* Preferred block size, bytes */
@ -161,7 +187,8 @@ typedef struct ImagingMemoryArena {
int stats_reallocated_blocks; /* Number of blocks which were actually reallocated int stats_reallocated_blocks; /* Number of blocks which were actually reallocated
after retrieving */ after retrieving */
int stats_freed_blocks; /* Number of freed blocks */ int stats_freed_blocks; /* Number of freed blocks */
#ifdef Py_GIL_DISABLED
#ifdef IMAGING_ARENA_LOCKING
PyMutex mutex; PyMutex mutex;
#endif #endif
} *ImagingMemoryArena; } *ImagingMemoryArena;
@ -169,7 +196,7 @@ typedef struct ImagingMemoryArena {
/* Objects */ /* Objects */
/* ------- */ /* ------- */
extern struct ImagingMemoryArena ImagingDefaultArena; extern IMAGING_ARENA_TLS struct ImagingMemoryArena ImagingDefaultArena;
extern int extern int
ImagingMemorySetBlocksMax(ImagingMemoryArena arena, int blocks_max); ImagingMemorySetBlocksMax(ImagingMemoryArena arena, int blocks_max);
extern void extern void

View File

@ -218,9 +218,9 @@ ImagingNewPrologueSubtype(const char *mode, int xsize, int ysize, int size) {
break; break;
} }
MUTEX_LOCK(&ImagingDefaultArena.mutex); IMAGING_ARENA_LOCK(&ImagingDefaultArena.mutex);
ImagingDefaultArena.stats_new_count += 1; ImagingDefaultArena.stats_new_count += 1;
MUTEX_UNLOCK(&ImagingDefaultArena.mutex); IMAGING_ARENA_UNLOCK(&ImagingDefaultArena.mutex);
return im; return im;
} }
@ -259,7 +259,7 @@ ImagingDelete(Imaging im) {
#define IMAGING_PAGE_SIZE (4096) #define IMAGING_PAGE_SIZE (4096)
struct ImagingMemoryArena ImagingDefaultArena = { IMAGING_ARENA_TLS struct ImagingMemoryArena ImagingDefaultArena = {
1, // alignment 1, // alignment
16 * 1024 * 1024, // block_size 16 * 1024 * 1024, // block_size
0, // blocks_max 0, // blocks_max
@ -270,7 +270,7 @@ struct ImagingMemoryArena ImagingDefaultArena = {
0, 0,
0, 0,
0, // Stats 0, // Stats
#ifdef Py_GIL_DISABLED #ifdef IMAGING_ARENA_LOCKING
{0}, {0},
#endif #endif
}; };
@ -369,12 +369,12 @@ ImagingDestroyArray(Imaging im) {
int y = 0; int y = 0;
if (im->blocks) { if (im->blocks) {
MUTEX_LOCK(&ImagingDefaultArena.mutex); IMAGING_ARENA_LOCK(&ImagingDefaultArena.mutex);
while (im->blocks[y].ptr) { while (im->blocks[y].ptr) {
memory_return_block(&ImagingDefaultArena, im->blocks[y]); memory_return_block(&ImagingDefaultArena, im->blocks[y]);
y += 1; y += 1;
} }
MUTEX_UNLOCK(&ImagingDefaultArena.mutex); IMAGING_ARENA_UNLOCK(&ImagingDefaultArena.mutex);
free(im->blocks); free(im->blocks);
} }
} }
@ -504,11 +504,11 @@ ImagingNewInternal(const char *mode, int xsize, int ysize, int dirty) {
return NULL; return NULL;
} }
MUTEX_LOCK(&ImagingDefaultArena.mutex); IMAGING_ARENA_LOCK(&ImagingDefaultArena.mutex);
Imaging tmp = ImagingAllocateArray( Imaging tmp = ImagingAllocateArray(
im, &ImagingDefaultArena, dirty, ImagingDefaultArena.block_size im, &ImagingDefaultArena, dirty, ImagingDefaultArena.block_size
); );
MUTEX_UNLOCK(&ImagingDefaultArena.mutex); IMAGING_ARENA_UNLOCK(&ImagingDefaultArena.mutex);
if (tmp) { if (tmp) {
return im; return im;
} }
@ -516,9 +516,9 @@ ImagingNewInternal(const char *mode, int xsize, int ysize, int dirty) {
ImagingError_Clear(); ImagingError_Clear();
// Try to allocate the image once more with smallest possible block size // Try to allocate the image once more with smallest possible block size
MUTEX_LOCK(&ImagingDefaultArena.mutex); IMAGING_ARENA_LOCK(&ImagingDefaultArena.mutex);
tmp = ImagingAllocateArray(im, &ImagingDefaultArena, dirty, IMAGING_PAGE_SIZE); tmp = ImagingAllocateArray(im, &ImagingDefaultArena, dirty, IMAGING_PAGE_SIZE);
MUTEX_UNLOCK(&ImagingDefaultArena.mutex); IMAGING_ARENA_UNLOCK(&ImagingDefaultArena.mutex);
if (tmp) { if (tmp) {
return im; return im;
} }