Merge pull request #369 from wiredfool/tifftags

Save arbitrary tags in Tiff image files
2025-12-22 17:43:18 +03:00 · 2013-11-30 12:43:45 -08:00 · 2013-11-30 12:43:45 -08:00 · 2c281dab88
commit 2c281dab88
parent 0365344fe6 543f96de61
6 changed files with 242 additions and 19 deletions
--- a/PIL/IptcImagePlugin.py
+++ b/PIL/IptcImagePlugin.py
@ -262,7 +262,7 @@ def getiptcinfo(im):
        # get raw data from the IPTC/NAA tag (PhotoShop tags the data
        # as 4-byte integers, so we cannot use the get method...)
        try:
-            type, data = im.tag.tagdata[TiffImagePlugin.IPTC_NAA_CHUNK]
+            data = im.tag.tagdata[TiffImagePlugin.IPTC_NAA_CHUNK]
        except (AttributeError, KeyError):
            pass
--- a/PIL/TiffImagePlugin.py
+++ b/PIL/TiffImagePlugin.py
@ -54,6 +54,8 @@ import collections
 import itertools
 import os
 II = b"II" # little-endian (intel-style)
 MM = b"MM" # big-endian (motorola-style)
@ -215,11 +217,45 @@ def _accept(prefix):
 # Wrapper for TIFF IFDs.
 class ImageFileDirectory(collections.MutableMapping):
    """ This class represents a TIFF tag directory.  To speed things
        up, we don't decode tags unless they're asked for.
-    # represents a TIFF tag directory.  to speed things up,
+        Exposes a dictionary interface of the tags in the directory
-    # we don't decode tags unless they're asked for.
+        ImageFileDirectory[key] = value
        value = ImageFileDirectory[key]
-    def __init__(self, prefix):
+        Also contains a dictionary of tag types as read from the tiff
        image file, 'ImageFileDirectory.tagtype'
        Data Structures:
        'public'
        * self.tagtype = {} Key: numerical tiff tag number
                            Value: integer corresponding to the data type from
                            `TiffTags.TYPES`
        'internal'            
        * self.tags = {}  Key: numerical tiff tag number
                          Value: Decoded data, Generally a tuple.
                            * If set from __setval__ -- always a tuple
                            * Numeric types -- always a tuple
                            * String type -- not a tuple, returned as string
                            * Undefined data -- not a tuple, returned as bytes
                            * Byte -- not a tuple, returned as byte.
        * self.tagdata = {} Key: numerical tiff tag number
                            Value: undecoded byte string from file
        Tags will be found in either self.tags or self.tagdata, but
        not both. The union of the two should contain all the tags
        from the Tiff image file.  External classes shouldn't
        reference these unless they're really sure what they're doing.
        """
    def __init__(self, prefix=II):
        """
        :prefix: 'II'|'MM'  tiff endianness
        """
        self.prefix = prefix[:2]
        if self.prefix == MM:
            self.i16, self.i32 = ib16, ib32
@ -265,7 +301,8 @@ class ImageFileDirectory(collections.MutableMapping):
        try:
            return self.tags[tag]
        except KeyError:
-            type, data = self.tagdata[tag] # unpack on the fly
+            data = self.tagdata[tag] # unpack on the fly
            type = self.tagtype[tag]
            size, handler = self.load_dispatch[type]
            self.tags[tag] = data = handler(self, data)
            del self.tagdata[tag]
@ -294,6 +331,9 @@ class ImageFileDirectory(collections.MutableMapping):
            return tag in self
    def __setitem__(self, tag, value):
        # tags are tuples for integers
        # tags are not tuples for byte, string, and undefined data.
        # see load_*
        if not isinstance(value, tuple):
            value = (value,)
        self.tags[tag] = value
@ -408,7 +448,7 @@ class ImageFileDirectory(collections.MutableMapping):
                warnings.warn("Possibly corrupt EXIF data.  Expecting to read %d bytes but only got %d. Skipping tag %s" % (size, len(data), tag))
                continue
-            self.tagdata[tag] = typ, data
+            self.tagdata[tag] = data
            self.tagtype[tag] = typ
            if Image.DEBUG:
@ -445,25 +485,42 @@ class ImageFileDirectory(collections.MutableMapping):
            if tag in self.tagtype:
                typ = self.tagtype[tag]
-
+                
            if Image.DEBUG:
                print ("Tag %s, Type: %s, Value: %s" % (tag, typ, value))
            if typ == 1:
                # byte data
-                data = value
+                if isinstance(value, tuple):
                    data = value = value[-1]
                else:
                    data = value
            elif typ == 7:
                # untyped data
                data = value = b"".join(value)
-            elif isinstance(value[0], str):
+            elif isStringType(value[0]):
                # string data
                if isinstance(value, tuple):
                    value = value[-1]
                typ = 2
-                data = value = b"\0".join(value.encode('ascii', 'replace')) + b"\0"
+                # was b'\0'.join(str), which led to \x00a\x00b sorts
                # of strings which I don't see in in the wild tiffs
                # and doesn't match the tiff spec: 8-bit byte that
                # contains a 7-bit ASCII code; the last byte must be
                # NUL (binary zero). Also, I don't think this was well
                # excersized before. 
                data = value = b"" + value.encode('ascii', 'replace') + b"\0"
            else:
                # integer data
                if tag == STRIPOFFSETS:
                    stripoffsets = len(directory)
                    typ = 4 # to avoid catch-22
-                elif tag in (X_RESOLUTION, Y_RESOLUTION):
+                elif tag in (X_RESOLUTION, Y_RESOLUTION) or typ==5:
                    # identify rational data fields
                    typ = 5
                    if isinstance(value[0], tuple):
                        # long name for flatten
                        value = tuple(itertools.chain.from_iterable(value))
                elif not typ:
                    typ = 3
                    for v in value:
@ -495,6 +552,7 @@ class ImageFileDirectory(collections.MutableMapping):
                count = len(value)
                if typ == 5:
                    count = count // 2        # adjust for rational data field
                append((tag, typ, count, o32(offset), data))
                offset = offset + len(data)
                if offset & 1:
@ -932,23 +990,34 @@ def _save(im, fp, filename):
    ifd[IMAGEWIDTH] = im.size[0]
    ifd[IMAGELENGTH] = im.size[1]
    # write any arbitrary tags passed in as an ImageFileDirectory
    info = im.encoderinfo.get("tiffinfo",{})
    if Image.DEBUG:
        print ("Tiffinfo Keys: %s"% info.keys)
    keys = list(info.keys())
    for key in keys:
        ifd[key] = info.get(key)
        try:
            ifd.tagtype[key] = info.tagtype[key]
        except:
            pass # might not be an IFD, Might not have populated type
    # additions written by Greg Couch, gregc@cgl.ucsf.edu
    # inspired by image-sig posting from Kevin Cazabon, kcazabon@home.com
    if hasattr(im, 'tag'):
        # preserve tags from original TIFF image file
-        for key in (RESOLUTION_UNIT, X_RESOLUTION, Y_RESOLUTION):
+        for key in (RESOLUTION_UNIT, X_RESOLUTION, Y_RESOLUTION,
-            if key in im.tag.tagdata:
+                    IPTC_NAA_CHUNK, PHOTOSHOP_CHUNK, XMP):
                ifd[key] = im.tag.tagdata.get(key)
        # preserve some more tags from original TIFF image file
        # -- 2008-06-06 Florian Hoech
        ifd.tagtype = im.tag.tagtype
        for key in (IPTC_NAA_CHUNK, PHOTOSHOP_CHUNK, XMP):
            if key in im.tag:
                ifd[key] = im.tag[key]
            ifd.tagtype[key] = im.tag.tagtype.get(key, None)
        # preserve ICC profile (should also work when saving other formats
        # which support profiles as TIFF) -- 2008-06-06 Florian Hoech
        if "icc_profile" in im.info:
            ifd[ICCPROFILE] = im.info["icc_profile"]
    if "description" in im.encoderinfo:
        ifd[IMAGEDESCRIPTION] = im.encoderinfo["description"]
    if "resolution" in im.encoderinfo:
--- a/Tests/images/lena.tif
+++ b/Tests/images/lena.tif
--- a/Tests/test_file_libtiff.py
+++ b/Tests/test_file_libtiff.py
@ -107,6 +107,29 @@ def test_adobe_deflate_tiff():
    assert_equal(im.tile[0][:3], ('tiff_adobe_deflate', (0, 0, 278, 374), 0))
    assert_no_exception(lambda: im.load())
 def test_write_metadata():
    """ Test metadata writing through libtiff """
    img = Image.open('Tests/images/lena_g4.tif')
    f = tempfile('temp.tiff')
    img.save(f, tiffinfo = img.tag)
    loaded = Image.open(f)
    original = img.tag.named()
    reloaded = loaded.tag.named()
    # PhotometricInterpretation is set from SAVE_INFO, not the original image. 
    ignored = ['StripByteCounts', 'RowsPerStrip', 'PageNumber', 'PhotometricInterpretation']
    for tag, value in reloaded.items():
        if tag not in ignored:
            assert_equal(original[tag], value, "%s didn't roundtrip" % tag)
    for tag, value in original.items():
        if tag not in ignored: 
            assert_equal(value, reloaded[tag], "%s didn't roundtrip" % tag)
 def test_g3_compression():
    i = Image.open('Tests/images/lena_g4_500.tif')
@ -116,7 +139,7 @@ def test_g3_compression():
    reread = Image.open(out)
    assert_equal(reread.info['compression'], 'group3')
    assert_image_equal(reread, i)
-    
+
 def test_little_endian():
    im = Image.open('Tests/images/12bit.deflate.tif')
    assert_equal(im.getpixel((0,0)), 480)
--- a/Tests/test_file_tiff_metadata.py
+++ b/Tests/test_file_tiff_metadata.py
@ -0,0 +1,80 @@
 from tester import *
 from PIL import Image, TiffImagePlugin, TiffTags
 tag_ids = dict(zip(TiffTags.TAGS.values(), TiffTags.TAGS.keys()))
 def test_rt_metadata():
    """ Test writing arbitray metadata into the tiff image directory
        Use case is ImageJ private tags, one numeric, one arbitrary
        data.  https://github.com/python-imaging/Pillow/issues/291
        """
    img = lena()
    textdata = "This is some arbitrary metadata for a text field"
    info = TiffImagePlugin.ImageFileDirectory()
    info[tag_ids['ImageJMetaDataByteCounts']] = len(textdata)
    info[tag_ids['ImageJMetaData']] = textdata
    f = tempfile("temp.tif")
    img.save(f, tiffinfo=info)
    loaded = Image.open(f)
    assert_equal(loaded.tag[50838], (len(textdata),))
    assert_equal(loaded.tag[50839], textdata)
 def test_read_metadata():
    img = Image.open('Tests/images/lena_g4.tif')
    known = {'YResolution': ((1207959552, 16777216),),
             'PlanarConfiguration': (1,),
             'BitsPerSample': (1,),
             'ImageLength': (128,),
             'Compression': (4,),
             'FillOrder': (1,),
             'DocumentName': 'lena.g4.tif',
             'RowsPerStrip': (128,),
             'ResolutionUnit': (1,),
             'PhotometricInterpretation': (0,),
             'PageNumber': (0, 1),
             'XResolution': ((1207959552, 16777216),),
             'ImageWidth': (128,),
             'Orientation': (1,),
             'StripByteCounts': (1796,),
             'SamplesPerPixel': (1,),
             'StripOffsets': (8,),
             'Software': 'ImageMagick 6.5.7-8 2012-08-17 Q16 http://www.imagemagick.org'}
    # assert_equal is equivalent, but less helpful in telling what's wrong. 
    named = img.tag.named()
    for tag, value in named.items():
        assert_equal(known[tag], value)
    for tag, value in known.items():
        assert_equal(value, named[tag])
 def test_write_metadata():
    """ Test metadata writing through the python code """
    img = Image.open('Tests/images/lena.tif')
    f = tempfile('temp.tiff')
    img.save(f, tiffinfo = img.tag)
    loaded = Image.open(f)
    original = img.tag.named()
    reloaded = loaded.tag.named()
    ignored = ['StripByteCounts', 'RowsPerStrip', 'PageNumber', 'StripOffsets']
    for tag, value in reloaded.items():
        if tag not in ignored:
            assert_equal(original[tag], value, "%s didn't roundtrip" % tag)
    for tag, value in original.items():
        if tag not in ignored: 
            assert_equal(value, reloaded[tag], "%s didn't roundtrip" % tag)
--- a/docs/handbook/image-file-formats.rst
+++ b/docs/handbook/image-file-formats.rst
@ -279,6 +279,57 @@ dictionary of decoded TIFF fields. Values are stored as either strings or
 tuples. Note that only short, long and ASCII tags are correctly unpacked by
 this release.
 Saving Tiff Images
 ~~~~~~~~~~~~~~~~~~
 The :py:meth:`~PIL.Image.Image.save` method can take the following keyword arguments:
 **tiffinfo** 
    A :py:class:`~PIL.TiffImagePlugin.ImageFileDirectory` object or dict
    object containing tiff tags and values. The TIFF field type is
    autodetected for Numeric and string values, any other types
    require using an :py:class:`~PIL.TiffImagePlugin.ImageFileDirectory`
    object and setting the type in
    :py:attr:`~PIL.TiffImagePlugin.ImageFileDirectory.tagtype` with
    the appropriate numerical value from
    ``TiffTags.TYPES``.
    .. versionadded:: 2.3.0
 **compression**
    A string containing the desired compression method for the
 	file. (valid only with libtiff installed) Valid compression
 	methods are: ``[None, "tiff_ccitt", "group3", "group4",
 	"tiff_jpeg", "tiff_adobe_deflate", "tiff_thunderscan",
 	"tiff_deflate", "tiff_sgilog", "tiff_sgilog24", "tiff_raw_16"]``
 These arguments to set the tiff header fields are an alternative to using the general tags available through tiffinfo.
 **description** 
 **software**
 **date time**
 **artist**
 **copyright**
    Strings
 **resolution unit**
    A string of "inch", "centimeter" or "cm" 
 **resolution**
 **x resolution**
 **y resolution**
 **dpi**
    Either a Float, Integer, or 2 tuple of (numerator,
    denominator). Resolution implies an equal x and y resolution, dpi
    also implies a unit of inches.
 WebP
 ^^^^