Add image band metadata for the 4 channel images

This commit is contained in:
wiredfool 2025-07-19 16:55:52 +02:00
parent d56032047d
commit 85212dbbb6
2 changed files with 112 additions and 1 deletions

View File

@ -1,5 +1,6 @@
from __future__ import annotations
import json
from typing import Any, NamedTuple
import pytest
@ -244,3 +245,29 @@ def test_from_int32array(mode: str, data_tp: DataShape, mask: list[int] | None)
img = Image.fromarrow(arr, mode, TEST_IMAGE_SIZE)
_test_img_equals_int32_pyarray(img, arr, mask, elts_per_pixel)
@pytest.mark.parametrize(
"mode, metadata",
(
("LA", ["L", "X", "X", "A"]),
("RGB", ["R", "G", "B", "X"]),
("RGBX", ["R", "G", "B", "X"]),
("RGBA", ["R", "G", "B", "A"]),
("CMYK", ["C", "M", "Y", "K"]),
("YCbCr", ["Y", "Cb", "Cr", "X"]),
("HSV", ["H", "S", "V", "X"]),
),
)
def test_image_metadata(mode: str, metadata: list[str]) -> None:
img = hopper(mode)
arr = pyarrow.array(img) # type: ignore[call-overload]
assert arr.type.field(0).metadata
assert arr.type.field(0).metadata[b"image"]
parsed_metadata = json.loads(arr.type.field(0).metadata[b"image"].decode("utf8"))
assert "bands" in parsed_metadata
assert parsed_metadata["bands"] == metadata

View File

@ -55,6 +55,77 @@ ReleaseExportedSchema(struct ArrowSchema *array) {
// Mark array released
array->release = NULL;
}
char *
image_band_json(Imaging im) {
char *format = "{\"bands\": [\"%s\", \"%s\", \"%s\", \"%s\"]}";
char *json;
// Bands can be 4 bands * 2 characters each
int len = strlen(format) + 8 + 1;
int err;
json = calloc(1, len);
if (!json) {
return NULL;
}
err = PyOS_snprintf(
json,
len,
format,
im->band_names[0],
im->band_names[1],
im->band_names[2],
im->band_names[3]
);
if (err < 0) {
return NULL;
}
return json;
}
char *
assemble_metadata(const char *band_json) {
/* format is
int32: number of key/value pairs (noted N below)
int32: byte length of key 0
key 0 (not null-terminated)
int32: byte length of value 0
value 0 (not null-terminated)
...
int32: byte length of key N - 1
key N - 1 (not null-terminated)
int32: byte length of value N - 1
value N - 1 (not null-terminated)
*/
const char *key = "image";
INT32 key_len = strlen(key);
INT32 band_json_len = strlen(band_json);
char *buf;
INT32 *dest_int;
char *dest;
buf = calloc(1, key_len + band_json_len + 4 + 1 * 8);
if (!buf) {
return NULL;
}
dest_int = (void *)buf;
dest_int[0] = 1;
dest_int[1] = key_len;
dest_int += 2;
dest = (void *)dest_int;
memcpy(dest, key, key_len);
dest += key_len;
dest_int = (void *)dest;
dest_int[0] = band_json_len;
dest_int += 1;
memcpy(dest_int, band_json, band_json_len);
return buf;
}
int
export_named_type(struct ArrowSchema *schema, char *format, char *name) {
@ -95,6 +166,8 @@ export_named_type(struct ArrowSchema *schema, char *format, char *name) {
int
export_imaging_schema(Imaging im, struct ArrowSchema *schema) {
int retval = 0;
char *metadata;
char *band_json;
if (strcmp(im->arrow_band_format, "") == 0) {
return IMAGING_ARROW_INCOMPATIBLE_MODE;
@ -117,13 +190,24 @@ export_imaging_schema(Imaging im, struct ArrowSchema *schema) {
schema->n_children = 1;
schema->children = calloc(1, sizeof(struct ArrowSchema *));
schema->children[0] = (struct ArrowSchema *)calloc(1, sizeof(struct ArrowSchema));
retval = export_named_type(schema->children[0], im->arrow_band_format, "pixel");
retval = export_named_type(schema->children[0], im->arrow_band_format, im->mode);
if (retval != 0) {
free(schema->children[0]);
free(schema->children);
schema->release(schema);
return retval;
}
// band related metadata
band_json = image_band_json(im);
if (band_json) {
// adding the metadata to the child array.
// Accessible in pyarrow via pa.array(img).type.field(0).metadata
// adding it to the top level is not accessible.
schema->children[0]->metadata = assemble_metadata(band_json);
free(band_json);
}
return 0;
}