Pillow/src/PIL/IptcImagePlugin.py

Ignoring revisions in .git-blame-ignore-revs. Click here to bypass and see the normal blame view.

250 lines
6.5 KiB
Python
Raw Normal View History

2010-07-31 06:52:47 +04:00
#
# The Python Imaging Library.
# $Id$
#
# IPTC/NAA file handling
#
# history:
# 1995-10-01 fl Created
# 1998-03-09 fl Cleaned up and added to PIL
# 2002-06-18 fl Added getiptcinfo helper
#
# Copyright (c) Secret Labs AB 1997-2002.
# Copyright (c) Fredrik Lundh 1995.
#
# See the README file for information on usage and redistribution.
#
from __future__ import annotations
from collections.abc import Sequence
2023-12-31 15:47:37 +03:00
from io import BytesIO
2024-07-15 12:23:36 +03:00
from typing import cast
2010-07-31 06:52:47 +04:00
from . import Image, ImageFile
from ._binary import i16be as i16
from ._binary import i32be as i32
from ._deprecate import deprecate
2010-07-31 06:52:47 +04:00
COMPRESSION = {1: "raw", 5: "jpeg"}
2023-12-31 15:21:56 +03:00
def __getattr__(name: str) -> bytes:
if name == "PAD":
deprecate("IptcImagePlugin.PAD", 12)
return b"\0\0\0\0"
msg = f"module '{__name__}' has no attribute '{name}'"
raise AttributeError(msg)
2010-07-31 06:52:47 +04:00
2014-07-30 20:39:11 +04:00
2010-07-31 06:52:47 +04:00
#
# Helpers
2019-03-21 16:28:20 +03:00
2023-12-31 15:21:56 +03:00
def _i(c: bytes) -> int:
return i32((b"\0\0\0\0" + c)[-4:])
2023-12-30 01:18:08 +03:00
def _i8(c: int | bytes) -> int:
return c if isinstance(c, int) else c[0]
2023-12-31 15:21:56 +03:00
def i(c: bytes) -> int:
""".. deprecated:: 10.2.0"""
deprecate("IptcImagePlugin.i", 12)
return _i(c)
2010-07-31 06:52:47 +04:00
2014-07-30 20:39:11 +04:00
2023-12-31 15:21:56 +03:00
def dump(c: Sequence[int | bytes]) -> None:
""".. deprecated:: 10.2.0"""
deprecate("IptcImagePlugin.dump", 12)
2010-07-31 06:52:47 +04:00
for i in c:
2024-05-07 07:01:08 +03:00
print(f"{_i8(i):02x}", end=" ")
print()
2010-07-31 06:52:47 +04:00
2014-07-30 20:39:11 +04:00
2010-07-31 06:52:47 +04:00
##
# Image plugin for IPTC/NAA datastreams. To read IPTC/NAA fields
# from TIFF and JPEG files, use the <b>getiptcinfo</b> function.
2019-03-21 16:28:20 +03:00
2010-07-31 06:52:47 +04:00
class IptcImageFile(ImageFile.ImageFile):
format = "IPTC"
format_description = "IPTC/NAA"
2023-12-31 15:21:56 +03:00
def getint(self, key: tuple[int, int]) -> int:
return _i(self.info[key])
2010-07-31 06:52:47 +04:00
2023-12-31 15:21:56 +03:00
def field(self) -> tuple[tuple[int, int] | None, int]:
2010-07-31 06:52:47 +04:00
#
# get a IPTC field header
s = self.fp.read(5)
if not s.strip(b"\x00"):
2010-07-31 06:52:47 +04:00
return None, 0
tag = s[1], s[2]
2010-07-31 06:52:47 +04:00
# syntax
2023-08-22 03:13:41 +03:00
if s[0] != 0x1C or tag[0] not in [1, 2, 3, 4, 5, 6, 7, 8, 9, 240]:
msg = "invalid IPTC/NAA file"
raise SyntaxError(msg)
2010-07-31 06:52:47 +04:00
# field size
size = s[3]
2010-07-31 06:52:47 +04:00
if size > 132:
msg = "illegal field length in IPTC/NAA file"
raise OSError(msg)
2010-07-31 06:52:47 +04:00
elif size == 128:
size = 0
elif size > 128:
size = _i(self.fp.read(size - 128))
2010-07-31 06:52:47 +04:00
else:
size = i16(s, 3)
2010-07-31 06:52:47 +04:00
return tag, size
2023-12-31 15:21:56 +03:00
def _open(self) -> None:
2010-07-31 06:52:47 +04:00
# load descriptive fields
while True:
2010-07-31 06:52:47 +04:00
offset = self.fp.tell()
tag, size = self.field()
2014-07-30 20:39:11 +04:00
if not tag or tag == (8, 10):
2010-07-31 06:52:47 +04:00
break
if size:
tagdata = self.fp.read(size)
else:
tagdata = None
if tag in self.info:
2010-07-31 06:52:47 +04:00
if isinstance(self.info[tag], list):
self.info[tag].append(tagdata)
else:
self.info[tag] = [self.info[tag], tagdata]
else:
self.info[tag] = tagdata
# mode
2023-12-30 01:18:08 +03:00
layers = self.info[(3, 60)][0]
component = self.info[(3, 60)][1]
2014-07-30 20:39:11 +04:00
if (3, 65) in self.info:
2023-12-30 01:18:08 +03:00
id = self.info[(3, 65)][0] - 1
2010-07-31 06:52:47 +04:00
else:
id = 0
if layers == 1 and not component:
self._mode = "L"
2010-07-31 06:52:47 +04:00
elif layers == 3 and component:
self._mode = "RGB"[id]
2010-07-31 06:52:47 +04:00
elif layers == 4 and component:
self._mode = "CMYK"[id]
2010-07-31 06:52:47 +04:00
# size
self._size = self.getint((3, 20)), self.getint((3, 30))
2010-07-31 06:52:47 +04:00
# compression
try:
2014-07-30 20:39:11 +04:00
compression = COMPRESSION[self.getint((3, 120))]
except KeyError as e:
msg = "Unknown IPTC image compression"
raise OSError(msg) from e
2010-07-31 06:52:47 +04:00
# tile
2014-07-30 20:39:11 +04:00
if tag == (8, 10):
2024-08-29 15:51:15 +03:00
self.tile = [
ImageFile._Tile("iptc", (0, 0) + self.size, offset, compression)
]
2010-07-31 06:52:47 +04:00
def load(self) -> Image.core.PixelAccess | None:
2010-07-31 06:52:47 +04:00
if len(self.tile) != 1 or self.tile[0][0] != "iptc":
return ImageFile.ImageFile.load(self)
offset, compression = self.tile[0][2:]
2010-07-31 06:52:47 +04:00
self.fp.seek(offset)
# Copy image data to temporary file
2023-12-31 15:47:37 +03:00
o = BytesIO()
if compression == "raw":
2010-07-31 06:52:47 +04:00
# To simplify access to the extracted file,
# prepend a PPM header
2023-12-31 15:47:37 +03:00
o.write(b"P5\n%d %d\n255\n" % self.size)
while True:
2010-07-31 06:52:47 +04:00
type, size = self.field()
if type != (8, 10):
break
while size > 0:
s = self.fp.read(min(size, 8192))
if not s:
break
o.write(s)
size -= len(s)
2010-07-31 06:52:47 +04:00
2023-12-31 15:47:37 +03:00
with Image.open(o) as _im:
_im.load()
self.im = _im.im
return None
2010-07-31 06:52:47 +04:00
Image.register_open(IptcImageFile.format, IptcImageFile)
2010-07-31 06:52:47 +04:00
Image.register_extension(IptcImageFile.format, ".iim")
2010-07-31 06:52:47 +04:00
2014-07-30 20:39:11 +04:00
2024-07-26 09:42:28 +03:00
def getiptcinfo(
im: ImageFile.ImageFile,
) -> dict[tuple[int, int], bytes | list[bytes]] | None:
2016-09-24 12:10:46 +03:00
"""
Get IPTC information from TIFF, JPEG, or IPTC file.
2010-07-31 06:52:47 +04:00
2016-09-24 12:10:46 +03:00
:param im: An image containing IPTC data.
:returns: A dictionary containing IPTC information, or None if
no IPTC information block was found.
"""
from . import JpegImagePlugin, TiffImagePlugin
2010-07-31 06:52:47 +04:00
data = None
2024-08-02 16:30:27 +03:00
info: dict[tuple[int, int], bytes | list[bytes]] = {}
2010-07-31 06:52:47 +04:00
if isinstance(im, IptcImageFile):
# return info dictionary right away
2024-08-02 16:30:27 +03:00
for k, v in im.info.items():
if isinstance(k, tuple):
info[k] = v
return info
2010-07-31 06:52:47 +04:00
elif isinstance(im, JpegImagePlugin.JpegImageFile):
# extract the IPTC/NAA resource
2019-03-06 02:28:45 +03:00
photoshop = im.info.get("photoshop")
if photoshop:
data = photoshop.get(0x0404)
2010-07-31 06:52:47 +04:00
elif isinstance(im, TiffImagePlugin.TiffImageFile):
# get raw data from the IPTC/NAA tag (PhotoShop tags the data
# as 4-byte integers, so we cannot use the get method...)
try:
2024-08-18 23:56:31 +03:00
data = im.tag_v2[TiffImagePlugin.IPTC_NAA_CHUNK]
2024-08-21 01:05:02 +03:00
except KeyError:
2010-07-31 06:52:47 +04:00
pass
if data is None:
2014-07-30 20:39:11 +04:00
return None # no properties
2010-07-31 06:52:47 +04:00
# create an IptcImagePlugin object without initializing it
class FakeImage:
2010-07-31 06:52:47 +04:00
pass
2019-03-21 16:28:20 +03:00
2024-07-15 12:23:36 +03:00
fake_im = FakeImage()
fake_im.__class__ = IptcImageFile # type: ignore[assignment]
iptc_im = cast(IptcImageFile, fake_im)
2010-07-31 06:52:47 +04:00
# parse the IPTC information chunk
2024-07-15 12:23:36 +03:00
iptc_im.info = {}
iptc_im.fp = BytesIO(data)
2010-07-31 06:52:47 +04:00
try:
2024-07-15 12:23:36 +03:00
iptc_im._open()
2010-07-31 06:52:47 +04:00
except (IndexError, KeyError):
2014-07-30 20:39:11 +04:00
pass # expected failure
2010-07-31 06:52:47 +04:00
2024-08-02 16:30:27 +03:00
for k, v in iptc_im.info.items():
if isinstance(k, tuple):
info[k] = v
return info