Merge pull request #369 from wiredfool/tifftags

Save arbitrary tags in Tiff image files
2025-08-05 04:50:09 +03:00 · 2013-11-30 12:43:45 -08:00 · 2013-11-30 12:43:45 -08:00 · 2c281dab88
commit 2c281dab88
parent 0365344fe6 543f96de61
6 changed files with 242 additions and 19 deletions
--- a/PIL/IptcImagePlugin.py
+++ b/PIL/IptcImagePlugin.py
@ -262,7 +262,7 @@ def getiptcinfo(im):
        # get raw data from the IPTC/NAA tag (PhotoShop tags the data
        # as 4-byte integers, so we cannot use the get method...)
        try:
-            type, data = im.tag.tagdata[TiffImagePlugin.IPTC_NAA_CHUNK]
+            data = im.tag.tagdata[TiffImagePlugin.IPTC_NAA_CHUNK]
        except (AttributeError, KeyError):
            pass

--- a/PIL/TiffImagePlugin.py
+++ b/PIL/TiffImagePlugin.py
@ -54,6 +54,8 @@ import collections
 import itertools
 import os

+
+
 II = b"II" # little-endian (intel-style)
 MM = b"MM" # big-endian (motorola-style)

@ -215,11 +217,45 @@ def _accept(prefix):
 # Wrapper for TIFF IFDs.

 class ImageFileDirectory(collections.MutableMapping):
+    """ This class represents a TIFF tag directory.  To speed things
+        up, we don't decode tags unless they're asked for.

-    # represents a TIFF tag directory.  to speed things up,
-    # we don't decode tags unless they're asked for.
+        Exposes a dictionary interface of the tags in the directory
+        ImageFileDirectory[key] = value
+        value = ImageFileDirectory[key]

-    def __init__(self, prefix):
+        Also contains a dictionary of tag types as read from the tiff
+        image file, 'ImageFileDirectory.tagtype'
+
+
+        Data Structures:
+        'public'
+        * self.tagtype = {} Key: numerical tiff tag number
+                            Value: integer corresponding to the data type from
+                            `TiffTags.TYPES`
+
+        'internal'            
+        * self.tags = {}  Key: numerical tiff tag number
+                          Value: Decoded data, Generally a tuple.
+                            * If set from __setval__ -- always a tuple
+                            * Numeric types -- always a tuple
+                            * String type -- not a tuple, returned as string
+                            * Undefined data -- not a tuple, returned as bytes
+                            * Byte -- not a tuple, returned as byte.
+        * self.tagdata = {} Key: numerical tiff tag number
+                            Value: undecoded byte string from file
+
+
+        Tags will be found in either self.tags or self.tagdata, but
+        not both. The union of the two should contain all the tags
+        from the Tiff image file.  External classes shouldn't
+        reference these unless they're really sure what they're doing.
+        """
+
+    def __init__(self, prefix=II):
+        """
+        :prefix: 'II'|'MM'  tiff endianness
+        """
        self.prefix = prefix[:2]
        if self.prefix == MM:
            self.i16, self.i32 = ib16, ib32
@ -265,7 +301,8 @@ class ImageFileDirectory(collections.MutableMapping):
        try:
            return self.tags[tag]
        except KeyError:
-            type, data = self.tagdata[tag] # unpack on the fly
+            data = self.tagdata[tag] # unpack on the fly
+            type = self.tagtype[tag]
            size, handler = self.load_dispatch[type]
            self.tags[tag] = data = handler(self, data)
            del self.tagdata[tag]
@ -294,6 +331,9 @@ class ImageFileDirectory(collections.MutableMapping):
            return tag in self

    def __setitem__(self, tag, value):
+        # tags are tuples for integers
+        # tags are not tuples for byte, string, and undefined data.
+        # see load_*
        if not isinstance(value, tuple):
            value = (value,)
        self.tags[tag] = value
@ -408,7 +448,7 @@ class ImageFileDirectory(collections.MutableMapping):
                warnings.warn("Possibly corrupt EXIF data.  Expecting to read %d bytes but only got %d. Skipping tag %s" % (size, len(data), tag))
                continue

-            self.tagdata[tag] = typ, data
+            self.tagdata[tag] = data
            self.tagtype[tag] = typ

            if Image.DEBUG:
@ -445,25 +485,42 @@ class ImageFileDirectory(collections.MutableMapping):

            if tag in self.tagtype:
                typ = self.tagtype[tag]
-
+                
+            if Image.DEBUG:
+                print ("Tag %s, Type: %s, Value: %s" % (tag, typ, value))
+                   
            if typ == 1:
                # byte data
-                data = value
+                if isinstance(value, tuple):
+                    data = value = value[-1]
+                else:
+                    data = value
            elif typ == 7:
                # untyped data
                data = value = b"".join(value)
-            elif isinstance(value[0], str):
+            elif isStringType(value[0]):
                # string data
+                if isinstance(value, tuple):
+                    value = value[-1]
                typ = 2
-                data = value = b"\0".join(value.encode('ascii', 'replace')) + b"\0"
+                # was b'\0'.join(str), which led to \x00a\x00b sorts
+                # of strings which I don't see in in the wild tiffs
+                # and doesn't match the tiff spec: 8-bit byte that
+                # contains a 7-bit ASCII code; the last byte must be
+                # NUL (binary zero). Also, I don't think this was well
+                # excersized before. 
+                data = value = b"" + value.encode('ascii', 'replace') + b"\0"
            else:
                # integer data
                if tag == STRIPOFFSETS:
                    stripoffsets = len(directory)
                    typ = 4 # to avoid catch-22
-                elif tag in (X_RESOLUTION, Y_RESOLUTION):
+                elif tag in (X_RESOLUTION, Y_RESOLUTION) or typ==5:
                    # identify rational data fields
                    typ = 5
+                    if isinstance(value[0], tuple):
+                        # long name for flatten
+                        value = tuple(itertools.chain.from_iterable(value))
                elif not typ:
                    typ = 3
                    for v in value:
@ -495,6 +552,7 @@ class ImageFileDirectory(collections.MutableMapping):
                count = len(value)
                if typ == 5:
                    count = count // 2        # adjust for rational data field
+
                append((tag, typ, count, o32(offset), data))
                offset = offset + len(data)
                if offset & 1:
@ -932,23 +990,34 @@ def _save(im, fp, filename):
    ifd[IMAGEWIDTH] = im.size[0]
    ifd[IMAGELENGTH] = im.size[1]

+    # write any arbitrary tags passed in as an ImageFileDirectory
+    info = im.encoderinfo.get("tiffinfo",{})
+    if Image.DEBUG:
+        print ("Tiffinfo Keys: %s"% info.keys)
+    keys = list(info.keys())
+    for key in keys:
+        ifd[key] = info.get(key)
+        try:
+            ifd.tagtype[key] = info.tagtype[key]
+        except:
+            pass # might not be an IFD, Might not have populated type
+
+
    # additions written by Greg Couch, gregc@cgl.ucsf.edu
    # inspired by image-sig posting from Kevin Cazabon, kcazabon@home.com
    if hasattr(im, 'tag'):
        # preserve tags from original TIFF image file
-        for key in (RESOLUTION_UNIT, X_RESOLUTION, Y_RESOLUTION):
-            if key in im.tag.tagdata:
-                ifd[key] = im.tag.tagdata.get(key)
-        # preserve some more tags from original TIFF image file
-        # -- 2008-06-06 Florian Hoech
-        ifd.tagtype = im.tag.tagtype
-        for key in (IPTC_NAA_CHUNK, PHOTOSHOP_CHUNK, XMP):
+        for key in (RESOLUTION_UNIT, X_RESOLUTION, Y_RESOLUTION,
+                    IPTC_NAA_CHUNK, PHOTOSHOP_CHUNK, XMP):
            if key in im.tag:
                ifd[key] = im.tag[key]
+            ifd.tagtype[key] = im.tag.tagtype.get(key, None)
+
        # preserve ICC profile (should also work when saving other formats
        # which support profiles as TIFF) -- 2008-06-06 Florian Hoech
        if "icc_profile" in im.info:
            ifd[ICCPROFILE] = im.info["icc_profile"]
+            
    if "description" in im.encoderinfo:
        ifd[IMAGEDESCRIPTION] = im.encoderinfo["description"]
    if "resolution" in im.encoderinfo:
--- a/Tests/images/lena.tif
+++ b/Tests/images/lena.tif
--- a/Tests/test_file_libtiff.py
+++ b/Tests/test_file_libtiff.py
@ -107,6 +107,29 @@ def test_adobe_deflate_tiff():
    assert_equal(im.tile[0][:3], ('tiff_adobe_deflate', (0, 0, 278, 374), 0))
    assert_no_exception(lambda: im.load())

+def test_write_metadata():
+    """ Test metadata writing through libtiff """
+    img = Image.open('Tests/images/lena_g4.tif')
+    f = tempfile('temp.tiff')
+
+    img.save(f, tiffinfo = img.tag)
+
+    loaded = Image.open(f)
+
+    original = img.tag.named()
+    reloaded = loaded.tag.named()
+
+    # PhotometricInterpretation is set from SAVE_INFO, not the original image. 
+    ignored = ['StripByteCounts', 'RowsPerStrip', 'PageNumber', 'PhotometricInterpretation']
+
+    for tag, value in reloaded.items():
+        if tag not in ignored:
+            assert_equal(original[tag], value, "%s didn't roundtrip" % tag)
+
+    for tag, value in original.items():
+        if tag not in ignored: 
+            assert_equal(value, reloaded[tag], "%s didn't roundtrip" % tag)
+

 def test_g3_compression():
    i = Image.open('Tests/images/lena_g4_500.tif')
@ -116,7 +139,7 @@ def test_g3_compression():
    reread = Image.open(out)
    assert_equal(reread.info['compression'], 'group3')
    assert_image_equal(reread, i)
-    
+
 def test_little_endian():
    im = Image.open('Tests/images/12bit.deflate.tif')
    assert_equal(im.getpixel((0,0)), 480)
--- a/Tests/test_file_tiff_metadata.py
+++ b/Tests/test_file_tiff_metadata.py
@ -0,0 +1,80 @@
+from tester import *
+from PIL import Image, TiffImagePlugin, TiffTags
+
+tag_ids = dict(zip(TiffTags.TAGS.values(), TiffTags.TAGS.keys()))
+
+def test_rt_metadata():
+    """ Test writing arbitray metadata into the tiff image directory
+        Use case is ImageJ private tags, one numeric, one arbitrary
+        data.  https://github.com/python-imaging/Pillow/issues/291
+        """
+    
+    img = lena()
+
+    textdata = "This is some arbitrary metadata for a text field"
+    info = TiffImagePlugin.ImageFileDirectory()
+
+    info[tag_ids['ImageJMetaDataByteCounts']] = len(textdata)
+    info[tag_ids['ImageJMetaData']] = textdata
+
+    f = tempfile("temp.tif")
+
+    img.save(f, tiffinfo=info)
+    
+    loaded = Image.open(f)
+
+    assert_equal(loaded.tag[50838], (len(textdata),))
+    assert_equal(loaded.tag[50839], textdata)
+    
+def test_read_metadata():
+    img = Image.open('Tests/images/lena_g4.tif')
+    
+    known = {'YResolution': ((1207959552, 16777216),),
+             'PlanarConfiguration': (1,),
+             'BitsPerSample': (1,),
+             'ImageLength': (128,),
+             'Compression': (4,),
+             'FillOrder': (1,),
+             'DocumentName': 'lena.g4.tif',
+             'RowsPerStrip': (128,),
+             'ResolutionUnit': (1,),
+             'PhotometricInterpretation': (0,),
+             'PageNumber': (0, 1),
+             'XResolution': ((1207959552, 16777216),),
+             'ImageWidth': (128,),
+             'Orientation': (1,),
+             'StripByteCounts': (1796,),
+             'SamplesPerPixel': (1,),
+             'StripOffsets': (8,),
+             'Software': 'ImageMagick 6.5.7-8 2012-08-17 Q16 http://www.imagemagick.org'}
+
+    # assert_equal is equivalent, but less helpful in telling what's wrong. 
+    named = img.tag.named()
+    for tag, value in named.items():
+        assert_equal(known[tag], value)
+
+    for tag, value in known.items():
+        assert_equal(value, named[tag])
+
+
+def test_write_metadata():
+    """ Test metadata writing through the python code """
+    img = Image.open('Tests/images/lena.tif')
+
+    f = tempfile('temp.tiff')
+    img.save(f, tiffinfo = img.tag)
+
+    loaded = Image.open(f)
+
+    original = img.tag.named()
+    reloaded = loaded.tag.named()
+
+    ignored = ['StripByteCounts', 'RowsPerStrip', 'PageNumber', 'StripOffsets']
+    
+    for tag, value in reloaded.items():
+        if tag not in ignored:
+            assert_equal(original[tag], value, "%s didn't roundtrip" % tag)
+
+    for tag, value in original.items():
+        if tag not in ignored: 
+            assert_equal(value, reloaded[tag], "%s didn't roundtrip" % tag)
--- a/docs/handbook/image-file-formats.rst
+++ b/docs/handbook/image-file-formats.rst
@ -279,6 +279,57 @@ dictionary of decoded TIFF fields. Values are stored as either strings or
 tuples. Note that only short, long and ASCII tags are correctly unpacked by
 this release.

+Saving Tiff Images
+~~~~~~~~~~~~~~~~~~
+
+The :py:meth:`~PIL.Image.Image.save` method can take the following keyword arguments:
+
+**tiffinfo** 
+    A :py:class:`~PIL.TiffImagePlugin.ImageFileDirectory` object or dict
+    object containing tiff tags and values. The TIFF field type is
+    autodetected for Numeric and string values, any other types
+    require using an :py:class:`~PIL.TiffImagePlugin.ImageFileDirectory`
+    object and setting the type in
+    :py:attr:`~PIL.TiffImagePlugin.ImageFileDirectory.tagtype` with
+    the appropriate numerical value from
+    ``TiffTags.TYPES``.
+ 
+    .. versionadded:: 2.3.0
+
+**compression**
+    A string containing the desired compression method for the
+	file. (valid only with libtiff installed) Valid compression
+	methods are: ``[None, "tiff_ccitt", "group3", "group4",
+	"tiff_jpeg", "tiff_adobe_deflate", "tiff_thunderscan",
+	"tiff_deflate", "tiff_sgilog", "tiff_sgilog24", "tiff_raw_16"]``
+
+These arguments to set the tiff header fields are an alternative to using the general tags available through tiffinfo.
+
+**description** 
+
+**software**
+
+**date time**
+
+**artist**
+
+**copyright**
+    Strings
+
+**resolution unit**
+    A string of "inch", "centimeter" or "cm" 
+
+**resolution**
+
+**x resolution**
+
+**y resolution**
+
+**dpi**
+    Either a Float, Integer, or 2 tuple of (numerator,
+    denominator). Resolution implies an equal x and y resolution, dpi
+    also implies a unit of inches.
+
 WebP
 ^^^^