From 5890e40365b12ef5b965eaa570bc426a69cb486e Mon Sep 17 00:00:00 2001 From: wiredfool Date: Sat, 24 Aug 2024 13:41:23 +0100 Subject: [PATCH] WIP - Initial Pillow->Arrow support * Fixed format, only for 4 channel images --- setup.py | 1 + src/PIL/Image.py | 7 ++ src/_imaging.c | 42 ++++++++++++ src/libImaging/Arrow.c | 139 +++++++++++++++++++++++++++++++++++++++ src/libImaging/Arrow.h | 48 ++++++++++++++ src/libImaging/Imaging.h | 11 ++++ src/libImaging/Storage.c | 7 ++ 7 files changed, 255 insertions(+) create mode 100644 src/libImaging/Arrow.c create mode 100644 src/libImaging/Arrow.h diff --git a/setup.py b/setup.py index a85731db9..80289e0c2 100644 --- a/setup.py +++ b/setup.py @@ -64,6 +64,7 @@ _IMAGING = ("decode", "encode", "map", "display", "outline", "path") _LIB_IMAGING = ( "Access", "AlphaComposite", + "Arrow", "Resample", "Reduce", "Bands", diff --git a/src/PIL/Image.py b/src/PIL/Image.py index dff3d063b..963b00fe2 100644 --- a/src/PIL/Image.py +++ b/src/PIL/Image.py @@ -748,6 +748,13 @@ class Image: new["shape"], new["typestr"] = _conv_type_shape(self) return new + + def __arrow_c_schema__(self) -> object: + return self.im.__arrow_c_schema__() + + def __arrow_c_array__(self, requested_schema: object | None = None) -> Tuple[object, object]: + return (self.im.__arrow_c_schema__(), self.im.__arrow_c_array__()) + def __getstate__(self) -> list[Any]: im_data = self.tobytes() # load image first return [self.info, self.mode, self.size, self.getpalette(), im_data] diff --git a/src/_imaging.c b/src/_imaging.c index 5d6d97bed..eccdcb9e8 100644 --- a/src/_imaging.c +++ b/src/_imaging.c @@ -89,6 +89,7 @@ #endif #include "libImaging/Imaging.h" +#include "libImaging/Arrow.h" #define _USE_MATH_DEFINES #include @@ -223,6 +224,43 @@ PyImaging_GetBuffer(PyObject *buffer, Py_buffer *view) { return PyObject_GetBuffer(buffer, view, PyBUF_SIMPLE); } +/* -------------------------------------------------------------------- */ +/* Arrow HANDLING */ +/* -------------------------------------------------------------------- */ + +void ReleaseArrowSchemaPyCapsule(PyObject* capsule) { + struct ArrowSchema* schema = + (struct ArrowSchema*)PyCapsule_GetPointer(capsule, "arrow_schema"); + if (schema->release != NULL) { + schema->release(schema); + } + free(schema); +} + +PyObject* ExportArrowSchemaPyCapsule(ImagingObject *self) { + struct ArrowSchema* schema = + (struct ArrowSchema*)malloc(sizeof(struct ArrowSchema)); + export_uint32_type(schema); + return PyCapsule_New(schema, "arrow_schema", ReleaseArrowSchemaPyCapsule); +} + +void ReleaseArrowArrayPyCapsule(PyObject* capsule) { + struct ArrowArray* array = + (struct ArrowArray*)PyCapsule_GetPointer(capsule, "arrow_array"); + if (array->release != NULL) { + array->release(array); + } + free(array); +} + +PyObject* ExportArrowArrayPyCapsule(ImagingObject *self) { + struct ArrowArray* array = + (struct ArrowArray*)malloc(sizeof(struct ArrowArray)); + export_imaging_array(self->image, array); + return PyCapsule_New(array, "arrow_array", ReleaseArrowArrayPyCapsule); +} + + /* -------------------------------------------------------------------- */ /* EXCEPTION REROUTING */ /* -------------------------------------------------------------------- */ @@ -3678,6 +3716,10 @@ static struct PyMethodDef methods[] = { /* Misc. */ {"save_ppm", (PyCFunction)_save_ppm, METH_VARARGS}, + /* arrow */ + {"__arrow_c_schema__", (PyCFunction)ExportArrowSchemaPyCapsule, METH_VARARGS}, + {"__arrow_c_array__", (PyCFunction)ExportArrowArrayPyCapsule, METH_VARARGS}, + {NULL, NULL} /* sentinel */ }; diff --git a/src/libImaging/Arrow.c b/src/libImaging/Arrow.c new file mode 100644 index 000000000..3a6d9dd60 --- /dev/null +++ b/src/libImaging/Arrow.c @@ -0,0 +1,139 @@ + +#include "Arrow.h" +#include "Imaging.h" + +/* struct ArrowSchema* */ +/* _arrow_schema_channel(char* channel, char* format) { */ + +/* } */ + +static void +ReleaseExportedSchema(struct ArrowSchema* array) { + // This should not be called on already released array + //assert(array->release != NULL); + + // Release children + for (int64_t i = 0; i < array->n_children; ++i) { + struct ArrowSchema* child = array->children[i]; + if (child->release != NULL) { + child->release(child); + //assert(child->release == NULL); + } + } + + // Release dictionary + struct ArrowSchema* dict = array->dictionary; + if (dict != NULL && dict->release != NULL) { + dict->release(dict); + //assert(dict->release == NULL); + } + + // TODO here: release and/or deallocate all data directly owned by + // the ArrowArray struct, such as the private_data. + + // Mark array released + array->release = NULL; +} + + +static void release_uint32_type(struct ArrowSchema* schema) { + // Mark released + schema->release = NULL; +} + +void export_uint32_type(struct ArrowSchema* schema) { + *schema = (struct ArrowSchema) { + // Type description + .format = "I", + .name = "", + .metadata = NULL, + .flags = 0, + .n_children = 0, + .children = NULL, + .dictionary = NULL, + // Bookkeeping + .release = &release_uint32_type + }; +} + +static void release_uint32_array(struct ArrowArray* array) { + //assert(array->n_buffers == 2); + // Free the buffers and the buffers array + free((void *) array->buffers[1]); + free(array->buffers); + // Mark released + array->release = NULL; +} + +void export_uint32_array(const uint32_t* data, int64_t nitems, + struct ArrowArray* array) { + // Initialize primitive fields + *array = (struct ArrowArray) { + // Data description + .length = nitems, + .offset = 0, + .null_count = 0, + .n_buffers = 2, + .n_children = 0, + .children = NULL, + .dictionary = NULL, + // Bookkeeping + .release = &release_uint32_array + }; + // Allocate list of buffers + array->buffers = (const void**) malloc(sizeof(void*) * array->n_buffers); + //assert(array->buffers != NULL); + array->buffers[0] = NULL; // no nulls, null bitmap can be omitted + array->buffers[1] = data; +} + +static void release_const_array(struct ArrowArray* array) { + Imaging im = (Imaging)array->private_data; + im->arrow_borrow--; + ImagingDelete(im); + + //assert(array->n_buffers == 2); + // Free the buffers and the buffers array + free(array->buffers); + // Mark released + array->release = NULL; +} + + +void export_imaging_array(Imaging im, struct ArrowArray* array) { + int length = im->xsize * im->ysize; + + /* undone -- for now, single block images */ + //assert (im->block_count = 0 || im->block_count = 1); + + if (im->lines_per_block && im->lines_per_block < im->ysize) { + length = im->xsize * im->lines_per_block; + } + + im->arrow_borrow++; + // Initialize primitive fields + *array = (struct ArrowArray) { + // Data description + .length = length, + .offset = 0, + .null_count = 0, + .n_buffers = 2, + .n_children = 0, + .children = NULL, + .dictionary = NULL, + // Bookkeeping + .release = &release_const_array, + .private_data = im + }; + + // Allocate list of buffers + array->buffers = (const void**) malloc(sizeof(void*) * array->n_buffers); + //assert(array->buffers != NULL); + array->buffers[0] = NULL; // no nulls, null bitmap can be omitted + + if (im->block) { + array->buffers[1] = im->block; + } else { + array->buffers[1] = im->blocks[0].ptr; + } +} diff --git a/src/libImaging/Arrow.h b/src/libImaging/Arrow.h new file mode 100644 index 000000000..758be27b0 --- /dev/null +++ b/src/libImaging/Arrow.h @@ -0,0 +1,48 @@ +#include +#include + +// Apache License 2.0. +// Source apache arrow project +// https://arrow.apache.org/docs/format/CDataInterface.html + +#ifndef ARROW_C_DATA_INTERFACE +#define ARROW_C_DATA_INTERFACE + +#define ARROW_FLAG_DICTIONARY_ORDERED 1 +#define ARROW_FLAG_NULLABLE 2 +#define ARROW_FLAG_MAP_KEYS_SORTED 4 + +struct ArrowSchema { + // Array type description + const char* format; + const char* name; + const char* metadata; + int64_t flags; + int64_t n_children; + struct ArrowSchema** children; + struct ArrowSchema* dictionary; + + // Release callback + void (*release)(struct ArrowSchema*); + // Opaque producer-specific data + void* private_data; +}; + +struct ArrowArray { + // Array data description + int64_t length; + int64_t null_count; + int64_t offset; + int64_t n_buffers; + int64_t n_children; + const void** buffers; + struct ArrowArray** children; + struct ArrowArray* dictionary; + + // Release callback + void (*release)(struct ArrowArray*); + // Opaque producer-specific data + void* private_data; +}; + +#endif // ARROW_C_DATA_INTERFACE diff --git a/src/libImaging/Imaging.h b/src/libImaging/Imaging.h index 31052c68a..a47ebedf0 100644 --- a/src/libImaging/Imaging.h +++ b/src/libImaging/Imaging.h @@ -104,6 +104,11 @@ struct ImagingMemoryInstance { /* Virtual methods */ void (*destroy)(Imaging im); + + /* arrow */ + int arrow_borrow; /* Number of arrow arrays that have been allocated */ + int blocks_count; /* Number of blocks that have been allocated */ + int lines_per_block; /* Number of lines in a block have been allocated */ }; #define IMAGING_PIXEL_1(im, x, y) ((im)->image8[(y)][(x)]) @@ -702,6 +707,12 @@ _imaging_seek_pyFd(PyObject *fd, Py_ssize_t offset, int whence); extern Py_ssize_t _imaging_tell_pyFd(PyObject *fd); +/* Arrow */ + +#include "Arrow.h" +extern void export_imaging_array(Imaging im, struct ArrowArray* array); +extern void export_uint32_type(struct ArrowSchema* schema); + /* Errcodes */ #define IMAGING_CODEC_END 1 #define IMAGING_CODEC_OVERRUN -1 diff --git a/src/libImaging/Storage.c b/src/libImaging/Storage.c index 522e9f375..1e3d6fce0 100644 --- a/src/libImaging/Storage.c +++ b/src/libImaging/Storage.c @@ -240,6 +240,11 @@ ImagingDelete(Imaging im) { if (im->palette) { ImagingPaletteDelete(im->palette); + im->palette = NULL; + } + + if (im->arrow_borrow) { + return; } if (im->destroy) { @@ -396,11 +401,13 @@ ImagingAllocateArray(Imaging im, ImagingMemoryArena arena, int dirty, int block_ if (lines_per_block == 0) { lines_per_block = 1; } + im->lines_per_block = lines_per_block; blocks_count = (im->ysize + lines_per_block - 1) / lines_per_block; // printf("NEW size: %dx%d, ls: %d, lpb: %d, blocks: %d\n", // im->xsize, im->ysize, aligned_linesize, lines_per_block, blocks_count); /* One extra pointer is always NULL */ + im->blocks_count = blocks_count; im->blocks = calloc(sizeof(*im->blocks), blocks_count + 1); if (!im->blocks) { return (Imaging)ImagingError_MemoryError();