Image -> Arrow support (#8330)

Co-authored-by: Andrew Murray <3112309+radarhere@users.noreply.github.com>
Co-authored-by: Hugo van Kemenade <1324225+hugovk@users.noreply.github.com>
This commit is contained in:
wiredfool 2025-04-01 07:10:45 +01:00 committed by GitHub
parent 7d50816f0a
commit 5c76e7ec17
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
17 changed files with 1165 additions and 1 deletions

View File

@ -36,6 +36,9 @@ python3 -m pip install -U pytest
python3 -m pip install -U pytest-cov
python3 -m pip install -U pytest-timeout
python3 -m pip install pyroma
# optional test dependency, only install if there's a binary package.
# fails on beta 3.14 and PyPy
python3 -m pip install --only-binary=:all: pyarrow || true
if [[ $(uname) != CYGWIN* ]]; then
python3 -m pip install numpy

View File

@ -30,6 +30,9 @@ python3 -m pip install -U pytest-cov
python3 -m pip install -U pytest-timeout
python3 -m pip install pyroma
python3 -m pip install numpy
# optional test dependency, only install if there's a binary package.
# fails on beta 3.14 and PyPy
python3 -m pip install --only-binary=:all: pyarrow || true
# libavif
pushd depends && ./install_libavif.sh && popd

View File

@ -88,6 +88,10 @@ jobs:
run: |
python3 -m pip install PyQt6
- name: Install PyArrow dependency
run: |
python3 -m pip install --only-binary=:all: pyarrow || true
- name: Install dependencies
id: install
run: |

164
Tests/test_arrow.py Normal file
View File

@ -0,0 +1,164 @@
from __future__ import annotations
import pytest
from PIL import Image
from .helper import hopper
@pytest.mark.parametrize(
"mode, dest_modes",
(
("L", ["I", "F", "LA", "RGB", "RGBA", "RGBX", "CMYK", "YCbCr", "HSV"]),
("I", ["L", "F"]), # Technically I;32 can work for any 4x8bit storage.
("F", ["I", "L", "LA", "RGB", "RGBA", "RGBX", "CMYK", "YCbCr", "HSV"]),
("LA", ["L", "F"]),
("RGB", ["L", "F"]),
("RGBA", ["L", "F"]),
("RGBX", ["L", "F"]),
("CMYK", ["L", "F"]),
("YCbCr", ["L", "F"]),
("HSV", ["L", "F"]),
),
)
def test_invalid_array_type(mode: str, dest_modes: list[str]) -> None:
img = hopper(mode)
for dest_mode in dest_modes:
with pytest.raises(ValueError):
Image.fromarrow(img, dest_mode, img.size)
def test_invalid_array_size() -> None:
img = hopper("RGB")
assert img.size != (10, 10)
with pytest.raises(ValueError):
Image.fromarrow(img, "RGB", (10, 10))
def test_release_schema() -> None:
# these should not error out, valgrind should be clean
img = hopper("L")
schema = img.__arrow_c_schema__()
del schema
def test_release_array() -> None:
# these should not error out, valgrind should be clean
img = hopper("L")
array, schema = img.__arrow_c_array__()
del array
del schema
def test_readonly() -> None:
img = hopper("L")
reloaded = Image.fromarrow(img, img.mode, img.size)
assert reloaded.readonly == 1
reloaded._readonly = 0
assert reloaded.readonly == 1
def test_multiblock_l_image() -> None:
block_size = Image.core.get_block_size()
# check a 2 block image in single channel mode
size = (4096, 2 * block_size // 4096)
img = Image.new("L", size, 128)
with pytest.raises(ValueError):
(schema, arr) = img.__arrow_c_array__()
def test_multiblock_rgba_image() -> None:
block_size = Image.core.get_block_size()
# check a 2 block image in 4 channel mode
size = (4096, (block_size // 4096) // 2)
img = Image.new("RGBA", size, (128, 127, 126, 125))
with pytest.raises(ValueError):
(schema, arr) = img.__arrow_c_array__()
def test_multiblock_l_schema() -> None:
block_size = Image.core.get_block_size()
# check a 2 block image in single channel mode
size = (4096, 2 * block_size // 4096)
img = Image.new("L", size, 128)
with pytest.raises(ValueError):
img.__arrow_c_schema__()
def test_multiblock_rgba_schema() -> None:
block_size = Image.core.get_block_size()
# check a 2 block image in 4 channel mode
size = (4096, (block_size // 4096) // 2)
img = Image.new("RGBA", size, (128, 127, 126, 125))
with pytest.raises(ValueError):
img.__arrow_c_schema__()
def test_singleblock_l_image() -> None:
Image.core.set_use_block_allocator(1)
block_size = Image.core.get_block_size()
# check a 2 block image in 4 channel mode
size = (4096, 2 * (block_size // 4096))
img = Image.new("L", size, 128)
assert img.im.isblock()
(schema, arr) = img.__arrow_c_array__()
assert schema
assert arr
Image.core.set_use_block_allocator(0)
def test_singleblock_rgba_image() -> None:
Image.core.set_use_block_allocator(1)
block_size = Image.core.get_block_size()
# check a 2 block image in 4 channel mode
size = (4096, (block_size // 4096) // 2)
img = Image.new("RGBA", size, (128, 127, 126, 125))
assert img.im.isblock()
(schema, arr) = img.__arrow_c_array__()
assert schema
assert arr
Image.core.set_use_block_allocator(0)
def test_singleblock_l_schema() -> None:
Image.core.set_use_block_allocator(1)
block_size = Image.core.get_block_size()
# check a 2 block image in single channel mode
size = (4096, 2 * block_size // 4096)
img = Image.new("L", size, 128)
assert img.im.isblock()
schema = img.__arrow_c_schema__()
assert schema
Image.core.set_use_block_allocator(0)
def test_singleblock_rgba_schema() -> None:
Image.core.set_use_block_allocator(1)
block_size = Image.core.get_block_size()
# check a 2 block image in 4 channel mode
size = (4096, (block_size // 4096) // 2)
img = Image.new("RGBA", size, (128, 127, 126, 125))
assert img.im.isblock()
schema = img.__arrow_c_schema__()
assert schema
Image.core.set_use_block_allocator(0)

112
Tests/test_pyarrow.py Normal file
View File

@ -0,0 +1,112 @@
from __future__ import annotations
from typing import Any # undone
import pytest
from PIL import Image
from .helper import (
assert_deep_equal,
assert_image_equal,
hopper,
)
pyarrow = pytest.importorskip("pyarrow", reason="PyArrow not installed")
TEST_IMAGE_SIZE = (10, 10)
def _test_img_equals_pyarray(
img: Image.Image, arr: Any, mask: list[int] | None
) -> None:
assert img.height * img.width == len(arr)
px = img.load()
assert px is not None
for x in range(0, img.size[0], int(img.size[0] / 10)):
for y in range(0, img.size[1], int(img.size[1] / 10)):
if mask:
for ix, elt in enumerate(mask):
pixel = px[x, y]
assert isinstance(pixel, tuple)
assert pixel[ix] == arr[y * img.width + x].as_py()[elt]
else:
assert_deep_equal(px[x, y], arr[y * img.width + x].as_py())
# really hard to get a non-nullable list type
fl_uint8_4_type = pyarrow.field(
"_", pyarrow.list_(pyarrow.field("_", pyarrow.uint8()).with_nullable(False), 4)
).type
@pytest.mark.parametrize(
"mode, dtype, mask",
(
("L", pyarrow.uint8(), None),
("I", pyarrow.int32(), None),
("F", pyarrow.float32(), None),
("LA", fl_uint8_4_type, [0, 3]),
("RGB", fl_uint8_4_type, [0, 1, 2]),
("RGBA", fl_uint8_4_type, None),
("RGBX", fl_uint8_4_type, None),
("CMYK", fl_uint8_4_type, None),
("YCbCr", fl_uint8_4_type, [0, 1, 2]),
("HSV", fl_uint8_4_type, [0, 1, 2]),
),
)
def test_to_array(mode: str, dtype: Any, mask: list[int] | None) -> None:
img = hopper(mode)
# Resize to non-square
img = img.crop((3, 0, 124, 127))
assert img.size == (121, 127)
arr = pyarrow.array(img)
_test_img_equals_pyarray(img, arr, mask)
assert arr.type == dtype
reloaded = Image.fromarrow(arr, mode, img.size)
assert reloaded
assert_image_equal(img, reloaded)
def test_lifetime() -> None:
# valgrind shouldn't error out here.
# arrays should be accessible after the image is deleted.
img = hopper("L")
arr_1 = pyarrow.array(img)
arr_2 = pyarrow.array(img)
del img
assert arr_1.sum().as_py() > 0
del arr_1
assert arr_2.sum().as_py() > 0
del arr_2
def test_lifetime2() -> None:
# valgrind shouldn't error out here.
# img should remain after the arrays are collected.
img = hopper("L")
arr_1 = pyarrow.array(img)
arr_2 = pyarrow.array(img)
assert arr_1.sum().as_py() > 0
del arr_1
assert arr_2.sum().as_py() > 0
del arr_2
img2 = img.copy()
px = img2.load()
assert px # make mypy happy
assert isinstance(px[0, 0], int)

View File

@ -79,6 +79,7 @@ Constructing images
.. autofunction:: new
.. autofunction:: fromarray
.. autofunction:: fromarrow
.. autofunction:: frombytes
.. autofunction:: frombuffer
@ -370,6 +371,8 @@ Protocols
.. autoclass:: SupportsArrayInterface
:show-inheritance:
.. autoclass:: SupportsArrowArrayInterface
:show-inheritance:
.. autoclass:: SupportsGetData
:show-inheritance:

View File

@ -0,0 +1,88 @@
.. _arrow-support:
=============
Arrow Support
=============
`Arrow <https://arrow.apache.org/>`__
is an in-memory data exchange format that is the spiritual
successor to the NumPy array interface. It provides for zero-copy
access to columnar data, which in our case is ``Image`` data.
The goal with Arrow is to provide native zero-copy interoperability
with any Arrow provider or consumer in the Python ecosystem.
.. warning:: Zero-copy does not mean zero allocation -- the internal
memory layout of Pillow images contains an allocation for row
pointers, so there is a non-zero, but significantly smaller than a
full-copy memory cost to reading an Arrow image.
Data Formats
============
Pillow currently supports exporting Arrow images in all modes
**except** for ``BGR;15``, ``BGR;16`` and ``BGR;24``. This is due to
line-length packing in these modes making for non-continuous memory.
For single-band images, the exported array is width*height elements,
with each pixel corresponding to the appropriate Arrow type.
For multiband images, the exported array is width*height fixed-length
four-element arrays of uint8. This is memory compatible with the raw
image storage of four bytes per pixel.
Mode ``1`` images are exported as one uint8 byte/pixel, as this is
consistent with the internal storage.
Pillow will accept, but not produce, one other format. For any
multichannel image with 32-bit storage per pixel, Pillow will accept
an array of width*height int32 elements, which will then be
interpreted using the mode-specific interpretation of the bytes.
The image mode must match the Arrow band format when reading single
channel images.
Memory Allocator
================
Pillow's default memory allocator, the :ref:`block_allocator`,
allocates up to a 16 MB block for images by default. Larger images
overflow into additional blocks. Arrow requires a single continuous
memory allocation, so images allocated in multiple blocks cannot be
exported in the Arrow format.
To enable the single block allocator::
from PIL import Image
Image.core.set_use_block_allocator(1)
Note that this is a global setting, not a per-image setting.
Unsupported Features
====================
* Table/dataframe protocol. We support a single array.
* Null markers, producing or consuming. Null values are inferred from
the mode, e.g. RGB images are stored in the first three bytes of
each 32-bit pixel, and the last byte is an implied null.
* Schema negotiation. There is an optional schema for the requested
datatype in the Arrow source interface. We ignore that
parameter.
* Array metadata.
Internal Details
================
Python Arrow C interface:
https://arrow.apache.org/docs/format/CDataInterface/PyCapsuleInterface.html
The memory that is exported from the Arrow interface is shared -- not
copied, so the lifetime of the memory allocation is no longer strictly
tied to the life of the Python object.
The core imaging struct now has a refcount associated with it, and the
lifetime of the core image struct is now divorced from the Python
image object. Creating an arrow reference to the image increments the
refcount, and the imaging struct is only released when the refcount
reaches zero.

View File

@ -1,3 +1,6 @@
.. _block_allocator:
Block Allocator
===============

View File

@ -9,3 +9,4 @@ Internal Reference
block_allocator
internal_modules
c_extension_debugging
arrow_support

View File

@ -54,6 +54,10 @@ optional-dependencies.fpx = [
optional-dependencies.mic = [
"olefile",
]
optional-dependencies.test-arrow = [
"pyarrow",
]
optional-dependencies.tests = [
"check-manifest",
"coverage>=7.4.2",
@ -67,6 +71,7 @@ optional-dependencies.tests = [
"pytest-timeout",
"trove-classifiers>=2024.10.12",
]
optional-dependencies.typing = [
"typing-extensions; python_version<'3.10'",
]

View File

@ -65,6 +65,7 @@ _IMAGING = ("decode", "encode", "map", "display", "outline", "path")
_LIB_IMAGING = (
"Access",
"AlphaComposite",
"Arrow",
"Resample",
"Reduce",
"Bands",

View File

@ -577,6 +577,14 @@ class Image:
def mode(self) -> str:
return self._mode
@property
def readonly(self) -> int:
return (self._im and self._im.readonly) or self._readonly
@readonly.setter
def readonly(self, readonly: int) -> None:
self._readonly = readonly
def _new(self, im: core.ImagingCore) -> Image:
new = Image()
new.im = im
@ -728,6 +736,16 @@ class Image:
new["shape"], new["typestr"] = _conv_type_shape(self)
return new
def __arrow_c_schema__(self) -> object:
self.load()
return self.im.__arrow_c_schema__()
def __arrow_c_array__(
self, requested_schema: object | None = None
) -> tuple[object, object]:
self.load()
return (self.im.__arrow_c_schema__(), self.im.__arrow_c_array__())
def __getstate__(self) -> list[Any]:
im_data = self.tobytes() # load image first
return [self.info, self.mode, self.size, self.getpalette(), im_data]
@ -3201,6 +3219,18 @@ class SupportsArrayInterface(Protocol):
raise NotImplementedError()
class SupportsArrowArrayInterface(Protocol):
"""
An object that has an ``__arrow_c_array__`` method corresponding to the arrow c
data interface.
"""
def __arrow_c_array__(
self, requested_schema: "PyCapsule" = None # type: ignore[name-defined] # noqa: F821, UP037
) -> tuple["PyCapsule", "PyCapsule"]: # type: ignore[name-defined] # noqa: F821, UP037
raise NotImplementedError()
def fromarray(obj: SupportsArrayInterface, mode: str | None = None) -> Image:
"""
Creates an image memory from an object exporting the array interface
@ -3289,6 +3319,56 @@ def fromarray(obj: SupportsArrayInterface, mode: str | None = None) -> Image:
return frombuffer(mode, size, obj, "raw", rawmode, 0, 1)
def fromarrow(obj: SupportsArrowArrayInterface, mode, size) -> Image:
"""Creates an image with zero-copy shared memory from an object exporting
the arrow_c_array interface protocol::
from PIL import Image
import pyarrow as pa
arr = pa.array([0]*(5*5*4), type=pa.uint8())
im = Image.fromarrow(arr, 'RGBA', (5, 5))
If the data representation of the ``obj`` is not compatible with
Pillow internal storage, a ValueError is raised.
Pillow images can also be converted to Arrow objects::
from PIL import Image
import pyarrow as pa
im = Image.open('hopper.jpg')
arr = pa.array(im)
As with array support, when converting Pillow images to arrays,
only pixel values are transferred. This means that P and PA mode
images will lose their palette.
:param obj: Object with an arrow_c_array interface
:param mode: Image mode.
:param size: Image size. This must match the storage of the arrow object.
:returns: An Image object
Note that according to the Arrow spec, both the producer and the
consumer should consider the exported array to be immutable, as
unsynchronized updates will potentially cause inconsistent data.
See: :ref:`arrow-support` for more detailed information
.. versionadded:: 11.2.0
"""
if not hasattr(obj, "__arrow_c_array__"):
msg = "arrow_c_array interface not found"
raise ValueError(msg)
(schema_capsule, array_capsule) = obj.__arrow_c_array__()
_im = core.new_arrow(mode, size, schema_capsule, array_capsule)
if _im:
return Image()._new(_im)
msg = "new_arrow returned None without an exception"
raise ValueError(msg)
def fromqimage(im: ImageQt.QImage) -> ImageFile.ImageFile:
"""Creates an image instance from a QImage image"""
from . import ImageQt

View File

@ -230,6 +230,93 @@ PyImaging_GetBuffer(PyObject *buffer, Py_buffer *view) {
return PyObject_GetBuffer(buffer, view, PyBUF_SIMPLE);
}
/* -------------------------------------------------------------------- */
/* Arrow HANDLING */
/* -------------------------------------------------------------------- */
PyObject *
ArrowError(int err) {
if (err == IMAGING_CODEC_MEMORY) {
return ImagingError_MemoryError();
}
if (err == IMAGING_ARROW_INCOMPATIBLE_MODE) {
return ImagingError_ValueError("Incompatible Pillow mode for Arrow array");
}
if (err == IMAGING_ARROW_MEMORY_LAYOUT) {
return ImagingError_ValueError(
"Image is in multiple array blocks, use imaging_new_block for zero copy"
);
}
return ImagingError_ValueError("Unknown error");
}
void
ReleaseArrowSchemaPyCapsule(PyObject *capsule) {
struct ArrowSchema *schema =
(struct ArrowSchema *)PyCapsule_GetPointer(capsule, "arrow_schema");
if (schema->release != NULL) {
schema->release(schema);
}
free(schema);
}
PyObject *
ExportArrowSchemaPyCapsule(ImagingObject *self) {
struct ArrowSchema *schema =
(struct ArrowSchema *)calloc(1, sizeof(struct ArrowSchema));
int err = export_imaging_schema(self->image, schema);
if (err == 0) {
return PyCapsule_New(schema, "arrow_schema", ReleaseArrowSchemaPyCapsule);
}
free(schema);
return ArrowError(err);
}
void
ReleaseArrowArrayPyCapsule(PyObject *capsule) {
struct ArrowArray *array =
(struct ArrowArray *)PyCapsule_GetPointer(capsule, "arrow_array");
if (array->release != NULL) {
array->release(array);
}
free(array);
}
PyObject *
ExportArrowArrayPyCapsule(ImagingObject *self) {
struct ArrowArray *array =
(struct ArrowArray *)calloc(1, sizeof(struct ArrowArray));
int err = export_imaging_array(self->image, array);
if (err == 0) {
return PyCapsule_New(array, "arrow_array", ReleaseArrowArrayPyCapsule);
}
free(array);
return ArrowError(err);
}
static PyObject *
_new_arrow(PyObject *self, PyObject *args) {
char *mode;
int xsize, ysize;
PyObject *schema_capsule, *array_capsule;
PyObject *ret;
if (!PyArg_ParseTuple(
args, "s(ii)OO", &mode, &xsize, &ysize, &schema_capsule, &array_capsule
)) {
return NULL;
}
// ImagingBorrowArrow is responsible for retaining the array_capsule
ret =
PyImagingNew(ImagingNewArrow(mode, xsize, ysize, schema_capsule, array_capsule)
);
if (!ret) {
return ImagingError_ValueError("Invalid Arrow array mode or size mismatch");
}
return ret;
}
/* -------------------------------------------------------------------- */
/* EXCEPTION REROUTING */
/* -------------------------------------------------------------------- */
@ -3655,6 +3742,10 @@ static struct PyMethodDef methods[] = {
/* Misc. */
{"save_ppm", (PyCFunction)_save_ppm, METH_VARARGS},
/* arrow */
{"__arrow_c_schema__", (PyCFunction)ExportArrowSchemaPyCapsule, METH_VARARGS},
{"__arrow_c_array__", (PyCFunction)ExportArrowArrayPyCapsule, METH_VARARGS},
{NULL, NULL} /* sentinel */
};
@ -3722,6 +3813,11 @@ _getattr_unsafe_ptrs(ImagingObject *self, void *closure) {
);
}
static PyObject *
_getattr_readonly(ImagingObject *self, void *closure) {
return PyLong_FromLong(self->image->read_only);
}
static struct PyGetSetDef getsetters[] = {
{"mode", (getter)_getattr_mode},
{"size", (getter)_getattr_size},
@ -3729,6 +3825,7 @@ static struct PyGetSetDef getsetters[] = {
{"id", (getter)_getattr_id},
{"ptr", (getter)_getattr_ptr},
{"unsafe_ptrs", (getter)_getattr_unsafe_ptrs},
{"readonly", (getter)_getattr_readonly},
{NULL}
};
@ -3983,6 +4080,21 @@ _set_blocks_max(PyObject *self, PyObject *args) {
Py_RETURN_NONE;
}
static PyObject *
_set_use_block_allocator(PyObject *self, PyObject *args) {
int use_block_allocator;
if (!PyArg_ParseTuple(args, "i:set_use_block_allocator", &use_block_allocator)) {
return NULL;
}
ImagingMemorySetBlockAllocator(&ImagingDefaultArena, use_block_allocator);
Py_RETURN_NONE;
}
static PyObject *
_get_use_block_allocator(PyObject *self, PyObject *args) {
return PyLong_FromLong(ImagingDefaultArena.use_block_allocator);
}
static PyObject *
_clear_cache(PyObject *self, PyObject *args) {
int i = 0;
@ -4104,6 +4216,7 @@ static PyMethodDef functions[] = {
{"fill", (PyCFunction)_fill, METH_VARARGS},
{"new", (PyCFunction)_new, METH_VARARGS},
{"new_block", (PyCFunction)_new_block, METH_VARARGS},
{"new_arrow", (PyCFunction)_new_arrow, METH_VARARGS},
{"merge", (PyCFunction)_merge, METH_VARARGS},
/* Functions */
@ -4190,9 +4303,11 @@ static PyMethodDef functions[] = {
{"get_alignment", (PyCFunction)_get_alignment, METH_VARARGS},
{"get_block_size", (PyCFunction)_get_block_size, METH_VARARGS},
{"get_blocks_max", (PyCFunction)_get_blocks_max, METH_VARARGS},
{"get_use_block_allocator", (PyCFunction)_get_use_block_allocator, METH_VARARGS},
{"set_alignment", (PyCFunction)_set_alignment, METH_VARARGS},
{"set_block_size", (PyCFunction)_set_block_size, METH_VARARGS},
{"set_blocks_max", (PyCFunction)_set_blocks_max, METH_VARARGS},
{"set_use_block_allocator", (PyCFunction)_set_use_block_allocator, METH_VARARGS},
{"clear_cache", (PyCFunction)_clear_cache, METH_VARARGS},
{NULL, NULL} /* sentinel */

299
src/libImaging/Arrow.c Normal file
View File

@ -0,0 +1,299 @@
#include "Arrow.h"
#include "Imaging.h"
#include <string.h>
/* struct ArrowSchema* */
/* _arrow_schema_channel(char* channel, char* format) { */
/* } */
static void
ReleaseExportedSchema(struct ArrowSchema *array) {
// This should not be called on already released array
// assert(array->release != NULL);
if (!array->release) {
return;
}
if (array->format) {
free((void *)array->format);
array->format = NULL;
}
if (array->name) {
free((void *)array->name);
array->name = NULL;
}
if (array->metadata) {
free((void *)array->metadata);
array->metadata = NULL;
}
// Release children
for (int64_t i = 0; i < array->n_children; ++i) {
struct ArrowSchema *child = array->children[i];
if (child->release != NULL) {
child->release(child);
child->release = NULL;
}
// UNDONE -- should I be releasing the children?
}
// Release dictionary
struct ArrowSchema *dict = array->dictionary;
if (dict != NULL && dict->release != NULL) {
dict->release(dict);
dict->release = NULL;
}
// TODO here: release and/or deallocate all data directly owned by
// the ArrowArray struct, such as the private_data.
// Mark array released
array->release = NULL;
}
int
export_named_type(struct ArrowSchema *schema, char *format, char *name) {
char *formatp;
char *namep;
size_t format_len = strlen(format) + 1;
size_t name_len = strlen(name) + 1;
formatp = calloc(format_len, 1);
if (!formatp) {
return IMAGING_CODEC_MEMORY;
}
namep = calloc(name_len, 1);
if (!namep) {
free(formatp);
return IMAGING_CODEC_MEMORY;
}
strncpy(formatp, format, format_len);
strncpy(namep, name, name_len);
*schema = (struct ArrowSchema){// Type description
.format = formatp,
.name = namep,
.metadata = NULL,
.flags = 0,
.n_children = 0,
.children = NULL,
.dictionary = NULL,
// Bookkeeping
.release = &ReleaseExportedSchema
};
return 0;
}
int
export_imaging_schema(Imaging im, struct ArrowSchema *schema) {
int retval = 0;
if (strcmp(im->arrow_band_format, "") == 0) {
return IMAGING_ARROW_INCOMPATIBLE_MODE;
}
/* for now, single block images */
if (!(im->blocks_count == 0 || im->blocks_count == 1)) {
return IMAGING_ARROW_MEMORY_LAYOUT;
}
if (im->bands == 1) {
return export_named_type(schema, im->arrow_band_format, im->band_names[0]);
}
retval = export_named_type(schema, "+w:4", "");
if (retval != 0) {
return retval;
}
// if it's not 1 band, it's an int32 at the moment. 4 uint8 bands.
schema->n_children = 1;
schema->children = calloc(1, sizeof(struct ArrowSchema *));
schema->children[0] = (struct ArrowSchema *)calloc(1, sizeof(struct ArrowSchema));
retval = export_named_type(schema->children[0], im->arrow_band_format, "pixel");
if (retval != 0) {
free(schema->children[0]);
schema->release(schema);
return retval;
}
return 0;
}
static void
release_const_array(struct ArrowArray *array) {
Imaging im = (Imaging)array->private_data;
if (array->n_children == 0) {
ImagingDelete(im);
}
// Free the buffers and the buffers array
if (array->buffers) {
free(array->buffers);
array->buffers = NULL;
}
if (array->children) {
// undone -- does arrow release all the children recursively?
for (int i = 0; i < array->n_children; i++) {
if (array->children[i]->release) {
array->children[i]->release(array->children[i]);
array->children[i]->release = NULL;
free(array->children[i]);
}
}
free(array->children);
array->children = NULL;
}
// Mark released
array->release = NULL;
}
int
export_single_channel_array(Imaging im, struct ArrowArray *array) {
int length = im->xsize * im->ysize;
/* for now, single block images */
if (!(im->blocks_count == 0 || im->blocks_count == 1)) {
return IMAGING_ARROW_MEMORY_LAYOUT;
}
if (im->lines_per_block && im->lines_per_block < im->ysize) {
length = im->xsize * im->lines_per_block;
}
MUTEX_LOCK(&im->mutex);
im->refcount++;
MUTEX_UNLOCK(&im->mutex);
// Initialize primitive fields
*array = (struct ArrowArray){// Data description
.length = length,
.offset = 0,
.null_count = 0,
.n_buffers = 2,
.n_children = 0,
.children = NULL,
.dictionary = NULL,
// Bookkeeping
.release = &release_const_array,
.private_data = im
};
// Allocate list of buffers
array->buffers = (const void **)malloc(sizeof(void *) * array->n_buffers);
// assert(array->buffers != NULL);
array->buffers[0] = NULL; // no nulls, null bitmap can be omitted
if (im->block) {
array->buffers[1] = im->block;
} else {
array->buffers[1] = im->blocks[0].ptr;
}
return 0;
}
int
export_fixed_pixel_array(Imaging im, struct ArrowArray *array) {
int length = im->xsize * im->ysize;
/* for now, single block images */
if (!(im->blocks_count == 0 || im->blocks_count == 1)) {
return IMAGING_ARROW_MEMORY_LAYOUT;
}
if (im->lines_per_block && im->lines_per_block < im->ysize) {
length = im->xsize * im->lines_per_block;
}
MUTEX_LOCK(&im->mutex);
im->refcount++;
MUTEX_UNLOCK(&im->mutex);
// Initialize primitive fields
// Fixed length arrays are 1 buffer of validity, and the length in pixels.
// Data is in a child array.
*array = (struct ArrowArray){// Data description
.length = length,
.offset = 0,
.null_count = 0,
.n_buffers = 1,
.n_children = 1,
.children = NULL,
.dictionary = NULL,
// Bookkeeping
.release = &release_const_array,
.private_data = im
};
// Allocate list of buffers
array->buffers = (const void **)calloc(1, sizeof(void *) * array->n_buffers);
if (!array->buffers) {
goto err;
}
// assert(array->buffers != NULL);
array->buffers[0] = NULL; // no nulls, null bitmap can be omitted
// if it's not 1 band, it's an int32 at the moment. 4 uint8 bands.
array->n_children = 1;
array->children = calloc(1, sizeof(struct ArrowArray *));
if (!array->children) {
goto err;
}
array->children[0] = (struct ArrowArray *)calloc(1, sizeof(struct ArrowArray));
if (!array->children[0]) {
goto err;
}
MUTEX_LOCK(&im->mutex);
im->refcount++;
MUTEX_UNLOCK(&im->mutex);
*array->children[0] = (struct ArrowArray){// Data description
.length = length * 4,
.offset = 0,
.null_count = 0,
.n_buffers = 2,
.n_children = 0,
.children = NULL,
.dictionary = NULL,
// Bookkeeping
.release = &release_const_array,
.private_data = im
};
array->children[0]->buffers =
(const void **)calloc(2, sizeof(void *) * array->n_buffers);
if (im->block) {
array->children[0]->buffers[1] = im->block;
} else {
array->children[0]->buffers[1] = im->blocks[0].ptr;
}
return 0;
err:
if (array->children[0]) {
free(array->children[0]);
}
if (array->children) {
free(array->children);
}
if (array->buffers) {
free(array->buffers);
}
return IMAGING_CODEC_MEMORY;
}
int
export_imaging_array(Imaging im, struct ArrowArray *array) {
if (strcmp(im->arrow_band_format, "") == 0) {
return IMAGING_ARROW_INCOMPATIBLE_MODE;
}
if (im->bands == 1) {
return export_single_channel_array(im, array);
}
return export_fixed_pixel_array(im, array);
}

48
src/libImaging/Arrow.h Normal file
View File

@ -0,0 +1,48 @@
#include <stdint.h>
#include <assert.h>
// Apache License 2.0.
// Source apache arrow project
// https://arrow.apache.org/docs/format/CDataInterface.html
#ifndef ARROW_C_DATA_INTERFACE
#define ARROW_C_DATA_INTERFACE
#define ARROW_FLAG_DICTIONARY_ORDERED 1
#define ARROW_FLAG_NULLABLE 2
#define ARROW_FLAG_MAP_KEYS_SORTED 4
struct ArrowSchema {
// Array type description
const char *format;
const char *name;
const char *metadata;
int64_t flags;
int64_t n_children;
struct ArrowSchema **children;
struct ArrowSchema *dictionary;
// Release callback
void (*release)(struct ArrowSchema *);
// Opaque producer-specific data
void *private_data;
};
struct ArrowArray {
// Array data description
int64_t length;
int64_t null_count;
int64_t offset;
int64_t n_buffers;
int64_t n_children;
const void **buffers;
struct ArrowArray **children;
struct ArrowArray *dictionary;
// Release callback
void (*release)(struct ArrowArray *);
// Opaque producer-specific data
void *private_data;
};
#endif // ARROW_C_DATA_INTERFACE

View File

@ -20,6 +20,8 @@ extern "C" {
#define M_PI 3.1415926535897932384626433832795
#endif
#include "Arrow.h"
/* -------------------------------------------------------------------- */
/*
@ -104,6 +106,21 @@ struct ImagingMemoryInstance {
/* Virtual methods */
void (*destroy)(Imaging im);
/* arrow */
int refcount; /* Number of arrow arrays that have been allocated */
char band_names[4][3]; /* names of bands, max 2 char + null terminator */
char arrow_band_format[2]; /* single character + null terminator */
int read_only; /* flag for read-only. set for arrow borrowed arrays */
PyObject *arrow_array_capsule; /* upstream arrow array source */
int blocks_count; /* Number of blocks that have been allocated */
int lines_per_block; /* Number of lines in a block have been allocated */
#ifdef Py_GIL_DISABLED
PyMutex mutex;
#endif
};
#define IMAGING_PIXEL_1(im, x, y) ((im)->image8[(y)][(x)])
@ -161,6 +178,7 @@ typedef struct ImagingMemoryArena {
int stats_reallocated_blocks; /* Number of blocks which were actually reallocated
after retrieving */
int stats_freed_blocks; /* Number of freed blocks */
int use_block_allocator; /* don't use arena, use block allocator */
#ifdef Py_GIL_DISABLED
PyMutex mutex;
#endif
@ -174,6 +192,8 @@ extern int
ImagingMemorySetBlocksMax(ImagingMemoryArena arena, int blocks_max);
extern void
ImagingMemoryClearCache(ImagingMemoryArena arena, int new_size);
extern void
ImagingMemorySetBlockAllocator(ImagingMemoryArena arena, int use_block_allocator);
extern Imaging
ImagingNew(const char *mode, int xsize, int ysize);
@ -187,6 +207,15 @@ ImagingDelete(Imaging im);
extern Imaging
ImagingNewBlock(const char *mode, int xsize, int ysize);
extern Imaging
ImagingNewArrow(
const char *mode,
int xsize,
int ysize,
PyObject *schema_capsule,
PyObject *array_capsule
);
extern Imaging
ImagingNewPrologue(const char *mode, int xsize, int ysize);
extern Imaging
@ -700,6 +729,13 @@ _imaging_seek_pyFd(PyObject *fd, Py_ssize_t offset, int whence);
extern Py_ssize_t
_imaging_tell_pyFd(PyObject *fd);
/* Arrow */
extern int
export_imaging_array(Imaging im, struct ArrowArray *array);
extern int
export_imaging_schema(Imaging im, struct ArrowSchema *schema);
/* Errcodes */
#define IMAGING_CODEC_END 1
#define IMAGING_CODEC_OVERRUN -1
@ -707,6 +743,8 @@ _imaging_tell_pyFd(PyObject *fd);
#define IMAGING_CODEC_UNKNOWN -3
#define IMAGING_CODEC_CONFIG -8
#define IMAGING_CODEC_MEMORY -9
#define IMAGING_ARROW_INCOMPATIBLE_MODE -10
#define IMAGING_ARROW_MEMORY_LAYOUT -11
#include "ImagingUtils.h"
extern UINT8 *clip8_lookups;

View File

@ -58,19 +58,22 @@ ImagingNewPrologueSubtype(const char *mode, int xsize, int ysize, int size) {
/* Setup image descriptor */
im->xsize = xsize;
im->ysize = ysize;
im->refcount = 1;
im->type = IMAGING_TYPE_UINT8;
strcpy(im->arrow_band_format, "C");
if (strcmp(mode, "1") == 0) {
/* 1-bit images */
im->bands = im->pixelsize = 1;
im->linesize = xsize;
strcpy(im->band_names[0], "1");
} else if (strcmp(mode, "P") == 0) {
/* 8-bit palette mapped images */
im->bands = im->pixelsize = 1;
im->linesize = xsize;
im->palette = ImagingPaletteNew("RGB");
strcpy(im->band_names[0], "P");
} else if (strcmp(mode, "PA") == 0) {
/* 8-bit palette with alpha */
@ -78,23 +81,36 @@ ImagingNewPrologueSubtype(const char *mode, int xsize, int ysize, int size) {
im->pixelsize = 4; /* store in image32 memory */
im->linesize = xsize * 4;
im->palette = ImagingPaletteNew("RGB");
strcpy(im->band_names[0], "P");
strcpy(im->band_names[1], "X");
strcpy(im->band_names[2], "X");
strcpy(im->band_names[3], "A");
} else if (strcmp(mode, "L") == 0) {
/* 8-bit grayscale (luminance) images */
im->bands = im->pixelsize = 1;
im->linesize = xsize;
strcpy(im->band_names[0], "L");
} else if (strcmp(mode, "LA") == 0) {
/* 8-bit grayscale (luminance) with alpha */
im->bands = 2;
im->pixelsize = 4; /* store in image32 memory */
im->linesize = xsize * 4;
strcpy(im->band_names[0], "L");
strcpy(im->band_names[1], "X");
strcpy(im->band_names[2], "X");
strcpy(im->band_names[3], "A");
} else if (strcmp(mode, "La") == 0) {
/* 8-bit grayscale (luminance) with premultiplied alpha */
im->bands = 2;
im->pixelsize = 4; /* store in image32 memory */
im->linesize = xsize * 4;
strcpy(im->band_names[0], "L");
strcpy(im->band_names[1], "X");
strcpy(im->band_names[2], "X");
strcpy(im->band_names[3], "a");
} else if (strcmp(mode, "F") == 0) {
/* 32-bit floating point images */
@ -102,6 +118,8 @@ ImagingNewPrologueSubtype(const char *mode, int xsize, int ysize, int size) {
im->pixelsize = 4;
im->linesize = xsize * 4;
im->type = IMAGING_TYPE_FLOAT32;
strcpy(im->arrow_band_format, "f");
strcpy(im->band_names[0], "F");
} else if (strcmp(mode, "I") == 0) {
/* 32-bit integer images */
@ -109,6 +127,8 @@ ImagingNewPrologueSubtype(const char *mode, int xsize, int ysize, int size) {
im->pixelsize = 4;
im->linesize = xsize * 4;
im->type = IMAGING_TYPE_INT32;
strcpy(im->arrow_band_format, "i");
strcpy(im->band_names[0], "I");
} else if (strcmp(mode, "I;16") == 0 || strcmp(mode, "I;16L") == 0 ||
strcmp(mode, "I;16B") == 0 || strcmp(mode, "I;16N") == 0) {
@ -118,12 +138,18 @@ ImagingNewPrologueSubtype(const char *mode, int xsize, int ysize, int size) {
im->pixelsize = 2;
im->linesize = xsize * 2;
im->type = IMAGING_TYPE_SPECIAL;
strcpy(im->arrow_band_format, "s");
strcpy(im->band_names[0], "I");
} else if (strcmp(mode, "RGB") == 0) {
/* 24-bit true colour images */
im->bands = 3;
im->pixelsize = 4;
im->linesize = xsize * 4;
strcpy(im->band_names[0], "R");
strcpy(im->band_names[1], "G");
strcpy(im->band_names[2], "B");
strcpy(im->band_names[3], "X");
} else if (strcmp(mode, "BGR;15") == 0) {
/* EXPERIMENTAL */
@ -132,6 +158,8 @@ ImagingNewPrologueSubtype(const char *mode, int xsize, int ysize, int size) {
im->pixelsize = 2;
im->linesize = (xsize * 2 + 3) & -4;
im->type = IMAGING_TYPE_SPECIAL;
/* not allowing arrow due to line length packing */
strcpy(im->arrow_band_format, "");
} else if (strcmp(mode, "BGR;16") == 0) {
/* EXPERIMENTAL */
@ -140,6 +168,8 @@ ImagingNewPrologueSubtype(const char *mode, int xsize, int ysize, int size) {
im->pixelsize = 2;
im->linesize = (xsize * 2 + 3) & -4;
im->type = IMAGING_TYPE_SPECIAL;
/* not allowing arrow due to line length packing */
strcpy(im->arrow_band_format, "");
} else if (strcmp(mode, "BGR;24") == 0) {
/* EXPERIMENTAL */
@ -148,32 +178,54 @@ ImagingNewPrologueSubtype(const char *mode, int xsize, int ysize, int size) {
im->pixelsize = 3;
im->linesize = (xsize * 3 + 3) & -4;
im->type = IMAGING_TYPE_SPECIAL;
/* not allowing arrow due to line length packing */
strcpy(im->arrow_band_format, "");
} else if (strcmp(mode, "RGBX") == 0) {
/* 32-bit true colour images with padding */
im->bands = im->pixelsize = 4;
im->linesize = xsize * 4;
strcpy(im->band_names[0], "R");
strcpy(im->band_names[1], "G");
strcpy(im->band_names[2], "B");
strcpy(im->band_names[3], "X");
} else if (strcmp(mode, "RGBA") == 0) {
/* 32-bit true colour images with alpha */
im->bands = im->pixelsize = 4;
im->linesize = xsize * 4;
strcpy(im->band_names[0], "R");
strcpy(im->band_names[1], "G");
strcpy(im->band_names[2], "B");
strcpy(im->band_names[3], "A");
} else if (strcmp(mode, "RGBa") == 0) {
/* 32-bit true colour images with premultiplied alpha */
im->bands = im->pixelsize = 4;
im->linesize = xsize * 4;
strcpy(im->band_names[0], "R");
strcpy(im->band_names[1], "G");
strcpy(im->band_names[2], "B");
strcpy(im->band_names[3], "a");
} else if (strcmp(mode, "CMYK") == 0) {
/* 32-bit colour separation */
im->bands = im->pixelsize = 4;
im->linesize = xsize * 4;
strcpy(im->band_names[0], "C");
strcpy(im->band_names[1], "M");
strcpy(im->band_names[2], "Y");
strcpy(im->band_names[3], "K");
} else if (strcmp(mode, "YCbCr") == 0) {
/* 24-bit video format */
im->bands = 3;
im->pixelsize = 4;
im->linesize = xsize * 4;
strcpy(im->band_names[0], "Y");
strcpy(im->band_names[1], "Cb");
strcpy(im->band_names[2], "Cr");
strcpy(im->band_names[3], "X");
} else if (strcmp(mode, "LAB") == 0) {
/* 24-bit color, luminance, + 2 color channels */
@ -181,6 +233,10 @@ ImagingNewPrologueSubtype(const char *mode, int xsize, int ysize, int size) {
im->bands = 3;
im->pixelsize = 4;
im->linesize = xsize * 4;
strcpy(im->band_names[0], "L");
strcpy(im->band_names[1], "a");
strcpy(im->band_names[2], "b");
strcpy(im->band_names[3], "X");
} else if (strcmp(mode, "HSV") == 0) {
/* 24-bit color, luminance, + 2 color channels */
@ -188,6 +244,10 @@ ImagingNewPrologueSubtype(const char *mode, int xsize, int ysize, int size) {
im->bands = 3;
im->pixelsize = 4;
im->linesize = xsize * 4;
strcpy(im->band_names[0], "H");
strcpy(im->band_names[1], "S");
strcpy(im->band_names[2], "V");
strcpy(im->band_names[3], "X");
} else {
free(im);
@ -218,6 +278,7 @@ ImagingNewPrologueSubtype(const char *mode, int xsize, int ysize, int size) {
break;
}
// UNDONE -- not accurate for arrow
MUTEX_LOCK(&ImagingDefaultArena.mutex);
ImagingDefaultArena.stats_new_count += 1;
MUTEX_UNLOCK(&ImagingDefaultArena.mutex);
@ -238,8 +299,18 @@ ImagingDelete(Imaging im) {
return;
}
MUTEX_LOCK(&im->mutex);
im->refcount--;
if (im->refcount > 0) {
MUTEX_UNLOCK(&im->mutex);
return;
}
MUTEX_UNLOCK(&im->mutex);
if (im->palette) {
ImagingPaletteDelete(im->palette);
im->palette = NULL;
}
if (im->destroy) {
@ -270,6 +341,7 @@ struct ImagingMemoryArena ImagingDefaultArena = {
0,
0,
0, // Stats
0, // use_block_allocator
#ifdef Py_GIL_DISABLED
{0},
#endif
@ -302,6 +374,11 @@ ImagingMemorySetBlocksMax(ImagingMemoryArena arena, int blocks_max) {
return 1;
}
void
ImagingMemorySetBlockAllocator(ImagingMemoryArena arena, int use_block_allocator) {
arena->use_block_allocator = use_block_allocator;
}
void
ImagingMemoryClearCache(ImagingMemoryArena arena, int new_size) {
while (arena->blocks_cached > new_size) {
@ -396,11 +473,13 @@ ImagingAllocateArray(Imaging im, ImagingMemoryArena arena, int dirty, int block_
if (lines_per_block == 0) {
lines_per_block = 1;
}
im->lines_per_block = lines_per_block;
blocks_count = (im->ysize + lines_per_block - 1) / lines_per_block;
// printf("NEW size: %dx%d, ls: %d, lpb: %d, blocks: %d\n",
// im->xsize, im->ysize, aligned_linesize, lines_per_block, blocks_count);
/* One extra pointer is always NULL */
im->blocks_count = blocks_count;
im->blocks = calloc(sizeof(*im->blocks), blocks_count + 1);
if (!im->blocks) {
return (Imaging)ImagingError_MemoryError();
@ -487,6 +566,58 @@ ImagingAllocateBlock(Imaging im) {
return im;
}
/* Borrowed Arrow Storage Type */
/* --------------------------- */
/* Don't allocate the image. */
static void
ImagingDestroyArrow(Imaging im) {
// Rely on the internal Python destructor for the array capsule.
if (im->arrow_array_capsule) {
Py_DECREF(im->arrow_array_capsule);
im->arrow_array_capsule = NULL;
}
}
Imaging
ImagingBorrowArrow(
Imaging im,
struct ArrowArray *external_array,
int offset_width,
PyObject *arrow_capsule
) {
// offset_width is the number of char* for a single offset from arrow
Py_ssize_t y, i;
char *borrowed_buffer = NULL;
struct ArrowArray *arr = external_array;
if (arr->n_children == 1) {
arr = arr->children[0];
}
if (arr->n_buffers == 2) {
// buffer 0 is the null list
// buffer 1 is the data
borrowed_buffer = (char *)arr->buffers[1] + (offset_width * arr->offset);
}
if (!borrowed_buffer) {
return (Imaging
)ImagingError_ValueError("Arrow Array, exactly 2 buffers required");
}
for (y = i = 0; y < im->ysize; y++) {
im->image[y] = borrowed_buffer + i;
i += im->linesize;
}
im->read_only = 1;
Py_INCREF(arrow_capsule);
im->arrow_array_capsule = arrow_capsule;
im->destroy = ImagingDestroyArrow;
return im;
}
/* --------------------------------------------------------------------
* Create a new, internally allocated, image.
*/
@ -529,11 +660,17 @@ ImagingNewInternal(const char *mode, int xsize, int ysize, int dirty) {
Imaging
ImagingNew(const char *mode, int xsize, int ysize) {
if (ImagingDefaultArena.use_block_allocator) {
return ImagingNewBlock(mode, xsize, ysize);
}
return ImagingNewInternal(mode, xsize, ysize, 0);
}
Imaging
ImagingNewDirty(const char *mode, int xsize, int ysize) {
if (ImagingDefaultArena.use_block_allocator) {
return ImagingNewBlock(mode, xsize, ysize);
}
return ImagingNewInternal(mode, xsize, ysize, 1);
}
@ -558,6 +695,66 @@ ImagingNewBlock(const char *mode, int xsize, int ysize) {
return NULL;
}
Imaging
ImagingNewArrow(
const char *mode,
int xsize,
int ysize,
PyObject *schema_capsule,
PyObject *array_capsule
) {
/* A borrowed arrow array */
Imaging im;
struct ArrowSchema *schema =
(struct ArrowSchema *)PyCapsule_GetPointer(schema_capsule, "arrow_schema");
struct ArrowArray *external_array =
(struct ArrowArray *)PyCapsule_GetPointer(array_capsule, "arrow_array");
if (xsize < 0 || ysize < 0) {
return (Imaging)ImagingError_ValueError("bad image size");
}
im = ImagingNewPrologue(mode, xsize, ysize);
if (!im) {
return NULL;
}
int64_t pixels = (int64_t)xsize * (int64_t)ysize;
// fmt:off // don't reformat this
if (((strcmp(schema->format, "I") == 0 // int32
&& im->pixelsize == 4 // 4xchar* storage
&& im->bands >= 2) // INT32 into any INT32 Storage mode
|| // (()||()) &&
(strcmp(schema->format, im->arrow_band_format) == 0 // same mode
&& im->bands == 1)) // Single band match
&& pixels == external_array->length) {
// one arrow element per, and it matches a pixelsize*char
if (ImagingBorrowArrow(im, external_array, im->pixelsize, array_capsule)) {
return im;
}
}
if (strcmp(schema->format, "+w:4") == 0 // 4 up array
&& im->pixelsize == 4 // storage as 32 bpc
&& schema->n_children > 0 // make sure schema is well formed.
&& schema->children // make sure schema is well formed
&& strcmp(schema->children[0]->format, "C") == 0 // Expected format
&& strcmp(im->arrow_band_format, "C") == 0 // Expected Format
&& pixels == external_array->length // expected length
&& external_array->n_children == 1 // array is well formed
&& external_array->children // array is well formed
&& 4 * pixels == external_array->children[0]->length) {
// 4 up element of char into pixelsize == 4
if (ImagingBorrowArrow(im, external_array, 1, array_capsule)) {
return im;
}
}
// fmt: on
ImagingDelete(im);
return NULL;
}
Imaging
ImagingNew2Dirty(const char *mode, Imaging imOut, Imaging imIn) {
/* allocate or validate output image */