2010-07-31 06:52:47 +04:00
|
|
|
#
|
|
|
|
# The Python Imaging Library.
|
|
|
|
# $Id$
|
|
|
|
#
|
|
|
|
# IPTC/NAA file handling
|
|
|
|
#
|
|
|
|
# history:
|
|
|
|
# 1995-10-01 fl Created
|
|
|
|
# 1998-03-09 fl Cleaned up and added to PIL
|
|
|
|
# 2002-06-18 fl Added getiptcinfo helper
|
|
|
|
#
|
|
|
|
# Copyright (c) Secret Labs AB 1997-2002.
|
|
|
|
# Copyright (c) Fredrik Lundh 1995.
|
|
|
|
#
|
|
|
|
# See the README file for information on usage and redistribution.
|
|
|
|
#
|
|
|
|
|
2012-10-16 06:27:35 +04:00
|
|
|
from __future__ import print_function
|
2010-07-31 06:52:47 +04:00
|
|
|
|
2013-03-07 20:20:28 +04:00
|
|
|
from PIL import Image, ImageFile, _binary
|
2014-07-30 20:39:11 +04:00
|
|
|
import os
|
|
|
|
import tempfile
|
2010-07-31 06:52:47 +04:00
|
|
|
|
2015-08-25 15:27:18 +03:00
|
|
|
__version__ = "0.3"
|
|
|
|
|
py3k: The big push
There are two main issues fixed with this commit:
* bytes vs. str: All file, image, and palette data are now handled as
bytes. A new _binary module consolidates the hacks needed to do this
across Python versions. tostring/fromstring methods have been renamed to
tobytes/frombytes, but the Python 2.6/2.7 versions alias them to the old
names for compatibility. Users should move to tobytes/frombytes.
One other potentially-breaking change is that text data in image files
(such as tags, comments) are now explicitly handled with a specific
character encoding in mind. This works well with the Unicode str in
Python 3, but may trip up old code expecting a straight byte-for-byte
translation to a Python string. This also required a change to Gohlke's
tags tests (in Tests/test_file_png.py) to expect Unicode strings from
the code.
* True div vs. floor div: Many division operations used the "/" operator
to do floor division, which is now the "//" operator in Python 3. These
were fixed.
As of this commit, on the first pass, I have one failing test (improper
handling of a slice object in a C module, test_imagepath.py) in Python 3,
and three that that I haven't tried running yet (test_imagegl,
test_imagegrab, and test_imageqt). I also haven't tested anything on
Windows. All but the three skipped tests run flawlessly against Pythons
2.6 and 2.7.
2012-10-21 01:01:53 +04:00
|
|
|
i8 = _binary.i8
|
|
|
|
i16 = _binary.i16be
|
|
|
|
i32 = _binary.i32be
|
|
|
|
o8 = _binary.o8
|
2010-07-31 06:52:47 +04:00
|
|
|
|
|
|
|
COMPRESSION = {
|
|
|
|
1: "raw",
|
|
|
|
5: "jpeg"
|
|
|
|
}
|
|
|
|
|
py3k: The big push
There are two main issues fixed with this commit:
* bytes vs. str: All file, image, and palette data are now handled as
bytes. A new _binary module consolidates the hacks needed to do this
across Python versions. tostring/fromstring methods have been renamed to
tobytes/frombytes, but the Python 2.6/2.7 versions alias them to the old
names for compatibility. Users should move to tobytes/frombytes.
One other potentially-breaking change is that text data in image files
(such as tags, comments) are now explicitly handled with a specific
character encoding in mind. This works well with the Unicode str in
Python 3, but may trip up old code expecting a straight byte-for-byte
translation to a Python string. This also required a change to Gohlke's
tags tests (in Tests/test_file_png.py) to expect Unicode strings from
the code.
* True div vs. floor div: Many division operations used the "/" operator
to do floor division, which is now the "//" operator in Python 3. These
were fixed.
As of this commit, on the first pass, I have one failing test (improper
handling of a slice object in a C module, test_imagepath.py) in Python 3,
and three that that I haven't tried running yet (test_imagegl,
test_imagegrab, and test_imageqt). I also haven't tested anything on
Windows. All but the three skipped tests run flawlessly against Pythons
2.6 and 2.7.
2012-10-21 01:01:53 +04:00
|
|
|
PAD = o8(0) * 4
|
2010-07-31 06:52:47 +04:00
|
|
|
|
2014-07-30 20:39:11 +04:00
|
|
|
|
2010-07-31 06:52:47 +04:00
|
|
|
#
|
|
|
|
# Helpers
|
|
|
|
|
|
|
|
def i(c):
|
|
|
|
return i32((PAD + c)[-4:])
|
|
|
|
|
2014-07-30 20:39:11 +04:00
|
|
|
|
2010-07-31 06:52:47 +04:00
|
|
|
def dump(c):
|
|
|
|
for i in c:
|
py3k: The big push
There are two main issues fixed with this commit:
* bytes vs. str: All file, image, and palette data are now handled as
bytes. A new _binary module consolidates the hacks needed to do this
across Python versions. tostring/fromstring methods have been renamed to
tobytes/frombytes, but the Python 2.6/2.7 versions alias them to the old
names for compatibility. Users should move to tobytes/frombytes.
One other potentially-breaking change is that text data in image files
(such as tags, comments) are now explicitly handled with a specific
character encoding in mind. This works well with the Unicode str in
Python 3, but may trip up old code expecting a straight byte-for-byte
translation to a Python string. This also required a change to Gohlke's
tags tests (in Tests/test_file_png.py) to expect Unicode strings from
the code.
* True div vs. floor div: Many division operations used the "/" operator
to do floor division, which is now the "//" operator in Python 3. These
were fixed.
As of this commit, on the first pass, I have one failing test (improper
handling of a slice object in a C module, test_imagepath.py) in Python 3,
and three that that I haven't tried running yet (test_imagegl,
test_imagegrab, and test_imageqt). I also haven't tested anything on
Windows. All but the three skipped tests run flawlessly against Pythons
2.6 and 2.7.
2012-10-21 01:01:53 +04:00
|
|
|
print("%02x" % i8(i), end=' ')
|
2012-10-16 06:27:35 +04:00
|
|
|
print()
|
2010-07-31 06:52:47 +04:00
|
|
|
|
2014-07-30 20:39:11 +04:00
|
|
|
|
2010-07-31 06:52:47 +04:00
|
|
|
##
|
|
|
|
# Image plugin for IPTC/NAA datastreams. To read IPTC/NAA fields
|
|
|
|
# from TIFF and JPEG files, use the <b>getiptcinfo</b> function.
|
|
|
|
|
|
|
|
class IptcImageFile(ImageFile.ImageFile):
|
|
|
|
|
|
|
|
format = "IPTC"
|
|
|
|
format_description = "IPTC/NAA"
|
|
|
|
|
|
|
|
def getint(self, key):
|
|
|
|
return i(self.info[key])
|
|
|
|
|
|
|
|
def field(self):
|
|
|
|
#
|
|
|
|
# get a IPTC field header
|
|
|
|
s = self.fp.read(5)
|
|
|
|
if not len(s):
|
|
|
|
return None, 0
|
|
|
|
|
py3k: The big push
There are two main issues fixed with this commit:
* bytes vs. str: All file, image, and palette data are now handled as
bytes. A new _binary module consolidates the hacks needed to do this
across Python versions. tostring/fromstring methods have been renamed to
tobytes/frombytes, but the Python 2.6/2.7 versions alias them to the old
names for compatibility. Users should move to tobytes/frombytes.
One other potentially-breaking change is that text data in image files
(such as tags, comments) are now explicitly handled with a specific
character encoding in mind. This works well with the Unicode str in
Python 3, but may trip up old code expecting a straight byte-for-byte
translation to a Python string. This also required a change to Gohlke's
tags tests (in Tests/test_file_png.py) to expect Unicode strings from
the code.
* True div vs. floor div: Many division operations used the "/" operator
to do floor division, which is now the "//" operator in Python 3. These
were fixed.
As of this commit, on the first pass, I have one failing test (improper
handling of a slice object in a C module, test_imagepath.py) in Python 3,
and three that that I haven't tried running yet (test_imagegl,
test_imagegrab, and test_imageqt). I also haven't tested anything on
Windows. All but the three skipped tests run flawlessly against Pythons
2.6 and 2.7.
2012-10-21 01:01:53 +04:00
|
|
|
tag = i8(s[1]), i8(s[2])
|
2010-07-31 06:52:47 +04:00
|
|
|
|
|
|
|
# syntax
|
py3k: The big push
There are two main issues fixed with this commit:
* bytes vs. str: All file, image, and palette data are now handled as
bytes. A new _binary module consolidates the hacks needed to do this
across Python versions. tostring/fromstring methods have been renamed to
tobytes/frombytes, but the Python 2.6/2.7 versions alias them to the old
names for compatibility. Users should move to tobytes/frombytes.
One other potentially-breaking change is that text data in image files
(such as tags, comments) are now explicitly handled with a specific
character encoding in mind. This works well with the Unicode str in
Python 3, but may trip up old code expecting a straight byte-for-byte
translation to a Python string. This also required a change to Gohlke's
tags tests (in Tests/test_file_png.py) to expect Unicode strings from
the code.
* True div vs. floor div: Many division operations used the "/" operator
to do floor division, which is now the "//" operator in Python 3. These
were fixed.
As of this commit, on the first pass, I have one failing test (improper
handling of a slice object in a C module, test_imagepath.py) in Python 3,
and three that that I haven't tried running yet (test_imagegl,
test_imagegrab, and test_imageqt). I also haven't tested anything on
Windows. All but the three skipped tests run flawlessly against Pythons
2.6 and 2.7.
2012-10-21 01:01:53 +04:00
|
|
|
if i8(s[0]) != 0x1C or tag[0] < 1 or tag[0] > 9:
|
2012-10-11 07:52:53 +04:00
|
|
|
raise SyntaxError("invalid IPTC/NAA file")
|
2010-07-31 06:52:47 +04:00
|
|
|
|
|
|
|
# field size
|
py3k: The big push
There are two main issues fixed with this commit:
* bytes vs. str: All file, image, and palette data are now handled as
bytes. A new _binary module consolidates the hacks needed to do this
across Python versions. tostring/fromstring methods have been renamed to
tobytes/frombytes, but the Python 2.6/2.7 versions alias them to the old
names for compatibility. Users should move to tobytes/frombytes.
One other potentially-breaking change is that text data in image files
(such as tags, comments) are now explicitly handled with a specific
character encoding in mind. This works well with the Unicode str in
Python 3, but may trip up old code expecting a straight byte-for-byte
translation to a Python string. This also required a change to Gohlke's
tags tests (in Tests/test_file_png.py) to expect Unicode strings from
the code.
* True div vs. floor div: Many division operations used the "/" operator
to do floor division, which is now the "//" operator in Python 3. These
were fixed.
As of this commit, on the first pass, I have one failing test (improper
handling of a slice object in a C module, test_imagepath.py) in Python 3,
and three that that I haven't tried running yet (test_imagegl,
test_imagegrab, and test_imageqt). I also haven't tested anything on
Windows. All but the three skipped tests run flawlessly against Pythons
2.6 and 2.7.
2012-10-21 01:01:53 +04:00
|
|
|
size = i8(s[3])
|
2010-07-31 06:52:47 +04:00
|
|
|
if size > 132:
|
2012-10-11 07:52:53 +04:00
|
|
|
raise IOError("illegal field length in IPTC/NAA file")
|
2010-07-31 06:52:47 +04:00
|
|
|
elif size == 128:
|
|
|
|
size = 0
|
|
|
|
elif size > 128:
|
|
|
|
size = i(self.fp.read(size-128))
|
|
|
|
else:
|
|
|
|
size = i16(s[3:])
|
|
|
|
|
|
|
|
return tag, size
|
|
|
|
|
|
|
|
def _open(self):
|
|
|
|
|
|
|
|
# load descriptive fields
|
2012-10-17 07:39:56 +04:00
|
|
|
while True:
|
2010-07-31 06:52:47 +04:00
|
|
|
offset = self.fp.tell()
|
|
|
|
tag, size = self.field()
|
2014-07-30 20:39:11 +04:00
|
|
|
if not tag or tag == (8, 10):
|
2010-07-31 06:52:47 +04:00
|
|
|
break
|
|
|
|
if size:
|
|
|
|
tagdata = self.fp.read(size)
|
|
|
|
else:
|
|
|
|
tagdata = None
|
2012-10-16 01:18:27 +04:00
|
|
|
if tag in list(self.info.keys()):
|
2010-07-31 06:52:47 +04:00
|
|
|
if isinstance(self.info[tag], list):
|
|
|
|
self.info[tag].append(tagdata)
|
|
|
|
else:
|
|
|
|
self.info[tag] = [self.info[tag], tagdata]
|
|
|
|
else:
|
|
|
|
self.info[tag] = tagdata
|
|
|
|
|
|
|
|
# print tag, self.info[tag]
|
|
|
|
|
|
|
|
# mode
|
2014-07-30 20:39:11 +04:00
|
|
|
layers = i8(self.info[(3, 60)][0])
|
|
|
|
component = i8(self.info[(3, 60)][1])
|
|
|
|
if (3, 65) in self.info:
|
|
|
|
id = i8(self.info[(3, 65)][0])-1
|
2010-07-31 06:52:47 +04:00
|
|
|
else:
|
|
|
|
id = 0
|
|
|
|
if layers == 1 and not component:
|
|
|
|
self.mode = "L"
|
|
|
|
elif layers == 3 and component:
|
|
|
|
self.mode = "RGB"[id]
|
|
|
|
elif layers == 4 and component:
|
|
|
|
self.mode = "CMYK"[id]
|
|
|
|
|
|
|
|
# size
|
2014-07-30 20:39:11 +04:00
|
|
|
self.size = self.getint((3, 20)), self.getint((3, 30))
|
2010-07-31 06:52:47 +04:00
|
|
|
|
|
|
|
# compression
|
|
|
|
try:
|
2014-07-30 20:39:11 +04:00
|
|
|
compression = COMPRESSION[self.getint((3, 120))]
|
2010-07-31 06:52:47 +04:00
|
|
|
except KeyError:
|
2012-10-11 07:52:53 +04:00
|
|
|
raise IOError("Unknown IPTC image compression")
|
2010-07-31 06:52:47 +04:00
|
|
|
|
|
|
|
# tile
|
2014-07-30 20:39:11 +04:00
|
|
|
if tag == (8, 10):
|
2014-08-01 12:12:47 +04:00
|
|
|
self.tile = [("iptc", (compression, offset),
|
|
|
|
(0, 0, self.size[0], self.size[1]))]
|
2010-07-31 06:52:47 +04:00
|
|
|
|
|
|
|
def load(self):
|
|
|
|
|
|
|
|
if len(self.tile) != 1 or self.tile[0][0] != "iptc":
|
|
|
|
return ImageFile.ImageFile.load(self)
|
|
|
|
|
|
|
|
type, tile, box = self.tile[0]
|
|
|
|
|
|
|
|
encoding, offset = tile
|
|
|
|
|
|
|
|
self.fp.seek(offset)
|
|
|
|
|
|
|
|
# Copy image data to temporary file
|
2014-03-15 02:56:41 +04:00
|
|
|
o_fd, outfile = tempfile.mkstemp(text=False)
|
|
|
|
o = os.fdopen(o_fd)
|
2010-07-31 06:52:47 +04:00
|
|
|
if encoding == "raw":
|
|
|
|
# To simplify access to the extracted file,
|
|
|
|
# prepend a PPM header
|
|
|
|
o.write("P5\n%d %d\n255\n" % self.size)
|
2012-10-17 07:39:56 +04:00
|
|
|
while True:
|
2010-07-31 06:52:47 +04:00
|
|
|
type, size = self.field()
|
|
|
|
if type != (8, 10):
|
|
|
|
break
|
|
|
|
while size > 0:
|
|
|
|
s = self.fp.read(min(size, 8192))
|
|
|
|
if not s:
|
|
|
|
break
|
|
|
|
o.write(s)
|
2014-05-10 08:36:15 +04:00
|
|
|
size -= len(s)
|
2010-07-31 06:52:47 +04:00
|
|
|
o.close()
|
|
|
|
|
|
|
|
try:
|
|
|
|
try:
|
|
|
|
# fast
|
|
|
|
self.im = Image.core.open_ppm(outfile)
|
|
|
|
except:
|
|
|
|
# slightly slower
|
|
|
|
im = Image.open(outfile)
|
|
|
|
im.load()
|
|
|
|
self.im = im.im
|
|
|
|
finally:
|
2014-07-30 20:39:11 +04:00
|
|
|
try:
|
|
|
|
os.unlink(outfile)
|
|
|
|
except:
|
|
|
|
pass
|
2010-07-31 06:52:47 +04:00
|
|
|
|
|
|
|
|
2015-07-04 16:29:58 +03:00
|
|
|
Image.register_open(IptcImageFile.format, IptcImageFile)
|
2010-07-31 06:52:47 +04:00
|
|
|
|
2015-07-04 16:29:58 +03:00
|
|
|
Image.register_extension(IptcImageFile.format, ".iim")
|
2010-07-31 06:52:47 +04:00
|
|
|
|
2014-07-30 20:39:11 +04:00
|
|
|
|
2010-07-31 06:52:47 +04:00
|
|
|
##
|
|
|
|
# Get IPTC information from TIFF, JPEG, or IPTC file.
|
|
|
|
#
|
|
|
|
# @param im An image containing IPTC data.
|
|
|
|
# @return A dictionary containing IPTC information, or None if
|
|
|
|
# no IPTC information block was found.
|
|
|
|
|
|
|
|
def getiptcinfo(im):
|
|
|
|
|
2013-03-07 20:20:28 +04:00
|
|
|
from PIL import TiffImagePlugin, JpegImagePlugin
|
2012-10-17 07:01:19 +04:00
|
|
|
import io
|
2010-07-31 06:52:47 +04:00
|
|
|
|
|
|
|
data = None
|
|
|
|
|
|
|
|
if isinstance(im, IptcImageFile):
|
|
|
|
# return info dictionary right away
|
|
|
|
return im.info
|
|
|
|
|
|
|
|
elif isinstance(im, JpegImagePlugin.JpegImageFile):
|
|
|
|
# extract the IPTC/NAA resource
|
|
|
|
try:
|
|
|
|
app = im.app["APP13"]
|
2014-07-30 21:43:34 +04:00
|
|
|
if app[:14] == b"Photoshop 3.0\x00":
|
2010-07-31 06:52:47 +04:00
|
|
|
app = app[14:]
|
|
|
|
# parse the image resource block
|
|
|
|
offset = 0
|
2014-07-30 21:43:34 +04:00
|
|
|
while app[offset:offset+4] == b"8BIM":
|
2014-05-10 08:36:15 +04:00
|
|
|
offset += 4
|
2010-07-31 06:52:47 +04:00
|
|
|
# resource code
|
|
|
|
code = JpegImagePlugin.i16(app, offset)
|
2014-05-10 08:36:15 +04:00
|
|
|
offset += 2
|
2010-07-31 06:52:47 +04:00
|
|
|
# resource name (usually empty)
|
py3k: The big push
There are two main issues fixed with this commit:
* bytes vs. str: All file, image, and palette data are now handled as
bytes. A new _binary module consolidates the hacks needed to do this
across Python versions. tostring/fromstring methods have been renamed to
tobytes/frombytes, but the Python 2.6/2.7 versions alias them to the old
names for compatibility. Users should move to tobytes/frombytes.
One other potentially-breaking change is that text data in image files
(such as tags, comments) are now explicitly handled with a specific
character encoding in mind. This works well with the Unicode str in
Python 3, but may trip up old code expecting a straight byte-for-byte
translation to a Python string. This also required a change to Gohlke's
tags tests (in Tests/test_file_png.py) to expect Unicode strings from
the code.
* True div vs. floor div: Many division operations used the "/" operator
to do floor division, which is now the "//" operator in Python 3. These
were fixed.
As of this commit, on the first pass, I have one failing test (improper
handling of a slice object in a C module, test_imagepath.py) in Python 3,
and three that that I haven't tried running yet (test_imagegl,
test_imagegrab, and test_imageqt). I also haven't tested anything on
Windows. All but the three skipped tests run flawlessly against Pythons
2.6 and 2.7.
2012-10-21 01:01:53 +04:00
|
|
|
name_len = i8(app[offset])
|
2015-04-08 14:12:37 +03:00
|
|
|
# name = app[offset+1:offset+1+name_len]
|
2010-07-31 06:52:47 +04:00
|
|
|
offset = 1 + offset + name_len
|
|
|
|
if offset & 1:
|
2014-05-10 08:36:15 +04:00
|
|
|
offset += 1
|
2010-07-31 06:52:47 +04:00
|
|
|
# resource data block
|
|
|
|
size = JpegImagePlugin.i32(app, offset)
|
2014-05-10 08:36:15 +04:00
|
|
|
offset += 4
|
2010-07-31 06:52:47 +04:00
|
|
|
if code == 0x0404:
|
|
|
|
# 0x0404 contains IPTC/NAA data
|
|
|
|
data = app[offset:offset+size]
|
|
|
|
break
|
|
|
|
offset = offset + size
|
|
|
|
if offset & 1:
|
2014-05-10 08:36:15 +04:00
|
|
|
offset += 1
|
2010-07-31 06:52:47 +04:00
|
|
|
except (AttributeError, KeyError):
|
|
|
|
pass
|
|
|
|
|
|
|
|
elif isinstance(im, TiffImagePlugin.TiffImageFile):
|
|
|
|
# get raw data from the IPTC/NAA tag (PhotoShop tags the data
|
|
|
|
# as 4-byte integers, so we cannot use the get method...)
|
|
|
|
try:
|
2013-10-08 04:00:54 +04:00
|
|
|
data = im.tag.tagdata[TiffImagePlugin.IPTC_NAA_CHUNK]
|
2010-07-31 06:52:47 +04:00
|
|
|
except (AttributeError, KeyError):
|
|
|
|
pass
|
|
|
|
|
|
|
|
if data is None:
|
2014-07-30 20:39:11 +04:00
|
|
|
return None # no properties
|
2010-07-31 06:52:47 +04:00
|
|
|
|
|
|
|
# create an IptcImagePlugin object without initializing it
|
2015-05-26 17:07:21 +03:00
|
|
|
class FakeImage(object):
|
2010-07-31 06:52:47 +04:00
|
|
|
pass
|
|
|
|
im = FakeImage()
|
|
|
|
im.__class__ = IptcImageFile
|
|
|
|
|
|
|
|
# parse the IPTC information chunk
|
|
|
|
im.info = {}
|
2012-10-17 07:01:19 +04:00
|
|
|
im.fp = io.BytesIO(data)
|
2010-07-31 06:52:47 +04:00
|
|
|
|
|
|
|
try:
|
|
|
|
im._open()
|
|
|
|
except (IndexError, KeyError):
|
2014-07-30 20:39:11 +04:00
|
|
|
pass # expected failure
|
2010-07-31 06:52:47 +04:00
|
|
|
|
|
|
|
return im.info
|