Merge pull request #369 from wiredfool/tifftags

Save arbitrary tags in Tiff image files
This commit is contained in:
Alex Clark ☺ 2013-11-30 12:43:45 -08:00
commit 2c281dab88
6 changed files with 242 additions and 19 deletions

View File

@ -262,7 +262,7 @@ def getiptcinfo(im):
# get raw data from the IPTC/NAA tag (PhotoShop tags the data # get raw data from the IPTC/NAA tag (PhotoShop tags the data
# as 4-byte integers, so we cannot use the get method...) # as 4-byte integers, so we cannot use the get method...)
try: try:
type, data = im.tag.tagdata[TiffImagePlugin.IPTC_NAA_CHUNK] data = im.tag.tagdata[TiffImagePlugin.IPTC_NAA_CHUNK]
except (AttributeError, KeyError): except (AttributeError, KeyError):
pass pass

View File

@ -54,6 +54,8 @@ import collections
import itertools import itertools
import os import os
II = b"II" # little-endian (intel-style) II = b"II" # little-endian (intel-style)
MM = b"MM" # big-endian (motorola-style) MM = b"MM" # big-endian (motorola-style)
@ -215,11 +217,45 @@ def _accept(prefix):
# Wrapper for TIFF IFDs. # Wrapper for TIFF IFDs.
class ImageFileDirectory(collections.MutableMapping): class ImageFileDirectory(collections.MutableMapping):
""" This class represents a TIFF tag directory. To speed things
up, we don't decode tags unless they're asked for.
# represents a TIFF tag directory. to speed things up, Exposes a dictionary interface of the tags in the directory
# we don't decode tags unless they're asked for. ImageFileDirectory[key] = value
value = ImageFileDirectory[key]
def __init__(self, prefix): Also contains a dictionary of tag types as read from the tiff
image file, 'ImageFileDirectory.tagtype'
Data Structures:
'public'
* self.tagtype = {} Key: numerical tiff tag number
Value: integer corresponding to the data type from
`TiffTags.TYPES`
'internal'
* self.tags = {} Key: numerical tiff tag number
Value: Decoded data, Generally a tuple.
* If set from __setval__ -- always a tuple
* Numeric types -- always a tuple
* String type -- not a tuple, returned as string
* Undefined data -- not a tuple, returned as bytes
* Byte -- not a tuple, returned as byte.
* self.tagdata = {} Key: numerical tiff tag number
Value: undecoded byte string from file
Tags will be found in either self.tags or self.tagdata, but
not both. The union of the two should contain all the tags
from the Tiff image file. External classes shouldn't
reference these unless they're really sure what they're doing.
"""
def __init__(self, prefix=II):
"""
:prefix: 'II'|'MM' tiff endianness
"""
self.prefix = prefix[:2] self.prefix = prefix[:2]
if self.prefix == MM: if self.prefix == MM:
self.i16, self.i32 = ib16, ib32 self.i16, self.i32 = ib16, ib32
@ -265,7 +301,8 @@ class ImageFileDirectory(collections.MutableMapping):
try: try:
return self.tags[tag] return self.tags[tag]
except KeyError: except KeyError:
type, data = self.tagdata[tag] # unpack on the fly data = self.tagdata[tag] # unpack on the fly
type = self.tagtype[tag]
size, handler = self.load_dispatch[type] size, handler = self.load_dispatch[type]
self.tags[tag] = data = handler(self, data) self.tags[tag] = data = handler(self, data)
del self.tagdata[tag] del self.tagdata[tag]
@ -294,6 +331,9 @@ class ImageFileDirectory(collections.MutableMapping):
return tag in self return tag in self
def __setitem__(self, tag, value): def __setitem__(self, tag, value):
# tags are tuples for integers
# tags are not tuples for byte, string, and undefined data.
# see load_*
if not isinstance(value, tuple): if not isinstance(value, tuple):
value = (value,) value = (value,)
self.tags[tag] = value self.tags[tag] = value
@ -408,7 +448,7 @@ class ImageFileDirectory(collections.MutableMapping):
warnings.warn("Possibly corrupt EXIF data. Expecting to read %d bytes but only got %d. Skipping tag %s" % (size, len(data), tag)) warnings.warn("Possibly corrupt EXIF data. Expecting to read %d bytes but only got %d. Skipping tag %s" % (size, len(data), tag))
continue continue
self.tagdata[tag] = typ, data self.tagdata[tag] = data
self.tagtype[tag] = typ self.tagtype[tag] = typ
if Image.DEBUG: if Image.DEBUG:
@ -445,25 +485,42 @@ class ImageFileDirectory(collections.MutableMapping):
if tag in self.tagtype: if tag in self.tagtype:
typ = self.tagtype[tag] typ = self.tagtype[tag]
if Image.DEBUG:
print ("Tag %s, Type: %s, Value: %s" % (tag, typ, value))
if typ == 1: if typ == 1:
# byte data # byte data
data = value if isinstance(value, tuple):
data = value = value[-1]
else:
data = value
elif typ == 7: elif typ == 7:
# untyped data # untyped data
data = value = b"".join(value) data = value = b"".join(value)
elif isinstance(value[0], str): elif isStringType(value[0]):
# string data # string data
if isinstance(value, tuple):
value = value[-1]
typ = 2 typ = 2
data = value = b"\0".join(value.encode('ascii', 'replace')) + b"\0" # was b'\0'.join(str), which led to \x00a\x00b sorts
# of strings which I don't see in in the wild tiffs
# and doesn't match the tiff spec: 8-bit byte that
# contains a 7-bit ASCII code; the last byte must be
# NUL (binary zero). Also, I don't think this was well
# excersized before.
data = value = b"" + value.encode('ascii', 'replace') + b"\0"
else: else:
# integer data # integer data
if tag == STRIPOFFSETS: if tag == STRIPOFFSETS:
stripoffsets = len(directory) stripoffsets = len(directory)
typ = 4 # to avoid catch-22 typ = 4 # to avoid catch-22
elif tag in (X_RESOLUTION, Y_RESOLUTION): elif tag in (X_RESOLUTION, Y_RESOLUTION) or typ==5:
# identify rational data fields # identify rational data fields
typ = 5 typ = 5
if isinstance(value[0], tuple):
# long name for flatten
value = tuple(itertools.chain.from_iterable(value))
elif not typ: elif not typ:
typ = 3 typ = 3
for v in value: for v in value:
@ -495,6 +552,7 @@ class ImageFileDirectory(collections.MutableMapping):
count = len(value) count = len(value)
if typ == 5: if typ == 5:
count = count // 2 # adjust for rational data field count = count // 2 # adjust for rational data field
append((tag, typ, count, o32(offset), data)) append((tag, typ, count, o32(offset), data))
offset = offset + len(data) offset = offset + len(data)
if offset & 1: if offset & 1:
@ -932,23 +990,34 @@ def _save(im, fp, filename):
ifd[IMAGEWIDTH] = im.size[0] ifd[IMAGEWIDTH] = im.size[0]
ifd[IMAGELENGTH] = im.size[1] ifd[IMAGELENGTH] = im.size[1]
# write any arbitrary tags passed in as an ImageFileDirectory
info = im.encoderinfo.get("tiffinfo",{})
if Image.DEBUG:
print ("Tiffinfo Keys: %s"% info.keys)
keys = list(info.keys())
for key in keys:
ifd[key] = info.get(key)
try:
ifd.tagtype[key] = info.tagtype[key]
except:
pass # might not be an IFD, Might not have populated type
# additions written by Greg Couch, gregc@cgl.ucsf.edu # additions written by Greg Couch, gregc@cgl.ucsf.edu
# inspired by image-sig posting from Kevin Cazabon, kcazabon@home.com # inspired by image-sig posting from Kevin Cazabon, kcazabon@home.com
if hasattr(im, 'tag'): if hasattr(im, 'tag'):
# preserve tags from original TIFF image file # preserve tags from original TIFF image file
for key in (RESOLUTION_UNIT, X_RESOLUTION, Y_RESOLUTION): for key in (RESOLUTION_UNIT, X_RESOLUTION, Y_RESOLUTION,
if key in im.tag.tagdata: IPTC_NAA_CHUNK, PHOTOSHOP_CHUNK, XMP):
ifd[key] = im.tag.tagdata.get(key)
# preserve some more tags from original TIFF image file
# -- 2008-06-06 Florian Hoech
ifd.tagtype = im.tag.tagtype
for key in (IPTC_NAA_CHUNK, PHOTOSHOP_CHUNK, XMP):
if key in im.tag: if key in im.tag:
ifd[key] = im.tag[key] ifd[key] = im.tag[key]
ifd.tagtype[key] = im.tag.tagtype.get(key, None)
# preserve ICC profile (should also work when saving other formats # preserve ICC profile (should also work when saving other formats
# which support profiles as TIFF) -- 2008-06-06 Florian Hoech # which support profiles as TIFF) -- 2008-06-06 Florian Hoech
if "icc_profile" in im.info: if "icc_profile" in im.info:
ifd[ICCPROFILE] = im.info["icc_profile"] ifd[ICCPROFILE] = im.info["icc_profile"]
if "description" in im.encoderinfo: if "description" in im.encoderinfo:
ifd[IMAGEDESCRIPTION] = im.encoderinfo["description"] ifd[IMAGEDESCRIPTION] = im.encoderinfo["description"]
if "resolution" in im.encoderinfo: if "resolution" in im.encoderinfo:

BIN
Tests/images/lena.tif Normal file

Binary file not shown.

View File

@ -107,6 +107,29 @@ def test_adobe_deflate_tiff():
assert_equal(im.tile[0][:3], ('tiff_adobe_deflate', (0, 0, 278, 374), 0)) assert_equal(im.tile[0][:3], ('tiff_adobe_deflate', (0, 0, 278, 374), 0))
assert_no_exception(lambda: im.load()) assert_no_exception(lambda: im.load())
def test_write_metadata():
""" Test metadata writing through libtiff """
img = Image.open('Tests/images/lena_g4.tif')
f = tempfile('temp.tiff')
img.save(f, tiffinfo = img.tag)
loaded = Image.open(f)
original = img.tag.named()
reloaded = loaded.tag.named()
# PhotometricInterpretation is set from SAVE_INFO, not the original image.
ignored = ['StripByteCounts', 'RowsPerStrip', 'PageNumber', 'PhotometricInterpretation']
for tag, value in reloaded.items():
if tag not in ignored:
assert_equal(original[tag], value, "%s didn't roundtrip" % tag)
for tag, value in original.items():
if tag not in ignored:
assert_equal(value, reloaded[tag], "%s didn't roundtrip" % tag)
def test_g3_compression(): def test_g3_compression():
i = Image.open('Tests/images/lena_g4_500.tif') i = Image.open('Tests/images/lena_g4_500.tif')
@ -116,7 +139,7 @@ def test_g3_compression():
reread = Image.open(out) reread = Image.open(out)
assert_equal(reread.info['compression'], 'group3') assert_equal(reread.info['compression'], 'group3')
assert_image_equal(reread, i) assert_image_equal(reread, i)
def test_little_endian(): def test_little_endian():
im = Image.open('Tests/images/12bit.deflate.tif') im = Image.open('Tests/images/12bit.deflate.tif')
assert_equal(im.getpixel((0,0)), 480) assert_equal(im.getpixel((0,0)), 480)

View File

@ -0,0 +1,80 @@
from tester import *
from PIL import Image, TiffImagePlugin, TiffTags
tag_ids = dict(zip(TiffTags.TAGS.values(), TiffTags.TAGS.keys()))
def test_rt_metadata():
""" Test writing arbitray metadata into the tiff image directory
Use case is ImageJ private tags, one numeric, one arbitrary
data. https://github.com/python-imaging/Pillow/issues/291
"""
img = lena()
textdata = "This is some arbitrary metadata for a text field"
info = TiffImagePlugin.ImageFileDirectory()
info[tag_ids['ImageJMetaDataByteCounts']] = len(textdata)
info[tag_ids['ImageJMetaData']] = textdata
f = tempfile("temp.tif")
img.save(f, tiffinfo=info)
loaded = Image.open(f)
assert_equal(loaded.tag[50838], (len(textdata),))
assert_equal(loaded.tag[50839], textdata)
def test_read_metadata():
img = Image.open('Tests/images/lena_g4.tif')
known = {'YResolution': ((1207959552, 16777216),),
'PlanarConfiguration': (1,),
'BitsPerSample': (1,),
'ImageLength': (128,),
'Compression': (4,),
'FillOrder': (1,),
'DocumentName': 'lena.g4.tif',
'RowsPerStrip': (128,),
'ResolutionUnit': (1,),
'PhotometricInterpretation': (0,),
'PageNumber': (0, 1),
'XResolution': ((1207959552, 16777216),),
'ImageWidth': (128,),
'Orientation': (1,),
'StripByteCounts': (1796,),
'SamplesPerPixel': (1,),
'StripOffsets': (8,),
'Software': 'ImageMagick 6.5.7-8 2012-08-17 Q16 http://www.imagemagick.org'}
# assert_equal is equivalent, but less helpful in telling what's wrong.
named = img.tag.named()
for tag, value in named.items():
assert_equal(known[tag], value)
for tag, value in known.items():
assert_equal(value, named[tag])
def test_write_metadata():
""" Test metadata writing through the python code """
img = Image.open('Tests/images/lena.tif')
f = tempfile('temp.tiff')
img.save(f, tiffinfo = img.tag)
loaded = Image.open(f)
original = img.tag.named()
reloaded = loaded.tag.named()
ignored = ['StripByteCounts', 'RowsPerStrip', 'PageNumber', 'StripOffsets']
for tag, value in reloaded.items():
if tag not in ignored:
assert_equal(original[tag], value, "%s didn't roundtrip" % tag)
for tag, value in original.items():
if tag not in ignored:
assert_equal(value, reloaded[tag], "%s didn't roundtrip" % tag)

View File

@ -279,6 +279,57 @@ dictionary of decoded TIFF fields. Values are stored as either strings or
tuples. Note that only short, long and ASCII tags are correctly unpacked by tuples. Note that only short, long and ASCII tags are correctly unpacked by
this release. this release.
Saving Tiff Images
~~~~~~~~~~~~~~~~~~
The :py:meth:`~PIL.Image.Image.save` method can take the following keyword arguments:
**tiffinfo**
A :py:class:`~PIL.TiffImagePlugin.ImageFileDirectory` object or dict
object containing tiff tags and values. The TIFF field type is
autodetected for Numeric and string values, any other types
require using an :py:class:`~PIL.TiffImagePlugin.ImageFileDirectory`
object and setting the type in
:py:attr:`~PIL.TiffImagePlugin.ImageFileDirectory.tagtype` with
the appropriate numerical value from
``TiffTags.TYPES``.
.. versionadded:: 2.3.0
**compression**
A string containing the desired compression method for the
file. (valid only with libtiff installed) Valid compression
methods are: ``[None, "tiff_ccitt", "group3", "group4",
"tiff_jpeg", "tiff_adobe_deflate", "tiff_thunderscan",
"tiff_deflate", "tiff_sgilog", "tiff_sgilog24", "tiff_raw_16"]``
These arguments to set the tiff header fields are an alternative to using the general tags available through tiffinfo.
**description**
**software**
**date time**
**artist**
**copyright**
Strings
**resolution unit**
A string of "inch", "centimeter" or "cm"
**resolution**
**x resolution**
**y resolution**
**dpi**
Either a Float, Integer, or 2 tuple of (numerator,
denominator). Resolution implies an equal x and y resolution, dpi
also implies a unit of inches.
WebP WebP
^^^^ ^^^^