From 85212dbbb6400255a0522a99ce7fa18a40ea3f81 Mon Sep 17 00:00:00 2001 From: wiredfool Date: Sat, 19 Jul 2025 16:55:52 +0200 Subject: [PATCH] Add image band metadata for the 4 channel images --- Tests/test_pyarrow.py | 27 +++++++++++++ src/libImaging/Arrow.c | 86 +++++++++++++++++++++++++++++++++++++++++- 2 files changed, 112 insertions(+), 1 deletion(-) diff --git a/Tests/test_pyarrow.py b/Tests/test_pyarrow.py index 8dad94fe0..a69504e78 100644 --- a/Tests/test_pyarrow.py +++ b/Tests/test_pyarrow.py @@ -1,5 +1,6 @@ from __future__ import annotations +import json from typing import Any, NamedTuple import pytest @@ -244,3 +245,29 @@ def test_from_int32array(mode: str, data_tp: DataShape, mask: list[int] | None) img = Image.fromarrow(arr, mode, TEST_IMAGE_SIZE) _test_img_equals_int32_pyarray(img, arr, mask, elts_per_pixel) + + +@pytest.mark.parametrize( + "mode, metadata", + ( + ("LA", ["L", "X", "X", "A"]), + ("RGB", ["R", "G", "B", "X"]), + ("RGBX", ["R", "G", "B", "X"]), + ("RGBA", ["R", "G", "B", "A"]), + ("CMYK", ["C", "M", "Y", "K"]), + ("YCbCr", ["Y", "Cb", "Cr", "X"]), + ("HSV", ["H", "S", "V", "X"]), + ), +) +def test_image_metadata(mode: str, metadata: list[str]) -> None: + img = hopper(mode) + + arr = pyarrow.array(img) # type: ignore[call-overload] + + assert arr.type.field(0).metadata + assert arr.type.field(0).metadata[b"image"] + + parsed_metadata = json.loads(arr.type.field(0).metadata[b"image"].decode("utf8")) + + assert "bands" in parsed_metadata + assert parsed_metadata["bands"] == metadata diff --git a/src/libImaging/Arrow.c b/src/libImaging/Arrow.c index ccafe33b9..2ecec9b29 100644 --- a/src/libImaging/Arrow.c +++ b/src/libImaging/Arrow.c @@ -55,6 +55,77 @@ ReleaseExportedSchema(struct ArrowSchema *array) { // Mark array released array->release = NULL; } +char * +image_band_json(Imaging im) { + char *format = "{\"bands\": [\"%s\", \"%s\", \"%s\", \"%s\"]}"; + char *json; + // Bands can be 4 bands * 2 characters each + int len = strlen(format) + 8 + 1; + int err; + + json = calloc(1, len); + + if (!json) { + return NULL; + } + + err = PyOS_snprintf( + json, + len, + format, + im->band_names[0], + im->band_names[1], + im->band_names[2], + im->band_names[3] + ); + if (err < 0) { + return NULL; + } + return json; +} + +char * +assemble_metadata(const char *band_json) { + /* format is + int32: number of key/value pairs (noted N below) + int32: byte length of key 0 + key 0 (not null-terminated) + int32: byte length of value 0 + value 0 (not null-terminated) + ... + int32: byte length of key N - 1 + key N - 1 (not null-terminated) + int32: byte length of value N - 1 + value N - 1 (not null-terminated) + */ + const char *key = "image"; + INT32 key_len = strlen(key); + INT32 band_json_len = strlen(band_json); + + char *buf; + INT32 *dest_int; + char *dest; + + buf = calloc(1, key_len + band_json_len + 4 + 1 * 8); + if (!buf) { + return NULL; + } + + dest_int = (void *)buf; + + dest_int[0] = 1; + dest_int[1] = key_len; + dest_int += 2; + dest = (void *)dest_int; + memcpy(dest, key, key_len); + dest += key_len; + dest_int = (void *)dest; + dest_int[0] = band_json_len; + dest_int += 1; + memcpy(dest_int, band_json, band_json_len); + + return buf; +} int export_named_type(struct ArrowSchema *schema, char *format, char *name) { @@ -95,6 +166,8 @@ export_named_type(struct ArrowSchema *schema, char *format, char *name) { int export_imaging_schema(Imaging im, struct ArrowSchema *schema) { int retval = 0; + char *metadata; + char *band_json; if (strcmp(im->arrow_band_format, "") == 0) { return IMAGING_ARROW_INCOMPATIBLE_MODE; @@ -117,13 +190,24 @@ export_imaging_schema(Imaging im, struct ArrowSchema *schema) { schema->n_children = 1; schema->children = calloc(1, sizeof(struct ArrowSchema *)); schema->children[0] = (struct ArrowSchema *)calloc(1, sizeof(struct ArrowSchema)); - retval = export_named_type(schema->children[0], im->arrow_band_format, "pixel"); + retval = export_named_type(schema->children[0], im->arrow_band_format, im->mode); if (retval != 0) { free(schema->children[0]); free(schema->children); schema->release(schema); return retval; } + + // band related metadata + band_json = image_band_json(im); + if (band_json) { + // adding the metadata to the child array. + // Accessible in pyarrow via pa.array(img).type.field(0).metadata + // adding it to the top level is not accessible. + schema->children[0]->metadata = assemble_metadata(band_json); + free(band_json); + } + return 0; }