2010-07-31 06:52:47 +04:00
|
|
|
#
|
|
|
|
# The Python Imaging Library.
|
|
|
|
# $Id$
|
|
|
|
#
|
|
|
|
# PDF (Acrobat) file handling
|
|
|
|
#
|
|
|
|
# History:
|
|
|
|
# 1996-07-16 fl Created
|
|
|
|
# 1997-01-18 fl Fixed header
|
|
|
|
# 2004-02-21 fl Fixes for 1/L/CMYK images, etc.
|
|
|
|
# 2004-02-24 fl Fixes for 1 and P images.
|
|
|
|
#
|
|
|
|
# Copyright (c) 1997-2004 by Secret Labs AB. All rights reserved.
|
|
|
|
# Copyright (c) 1996-1997 by Fredrik Lundh.
|
|
|
|
#
|
|
|
|
# See the README file for information on usage and redistribution.
|
|
|
|
#
|
|
|
|
|
|
|
|
##
|
|
|
|
# Image plugin for PDF images (output only).
|
|
|
|
##
|
|
|
|
|
|
|
|
__version__ = "0.4"
|
|
|
|
|
2013-03-07 20:20:28 +04:00
|
|
|
from PIL import Image, ImageFile
|
|
|
|
from PIL._binary import i8
|
2012-10-17 07:01:19 +04:00
|
|
|
import io
|
2010-07-31 06:52:47 +04:00
|
|
|
|
|
|
|
|
|
|
|
#
|
|
|
|
# --------------------------------------------------------------------
|
|
|
|
|
|
|
|
# object ids:
|
|
|
|
# 1. catalogue
|
|
|
|
# 2. pages
|
|
|
|
# 3. image
|
|
|
|
# 4. page
|
|
|
|
# 5. page contents
|
|
|
|
|
|
|
|
def _obj(fp, obj, **dict):
|
2012-10-24 17:37:27 +04:00
|
|
|
fp.write("%d 0 obj\n" % obj)
|
2010-07-31 06:52:47 +04:00
|
|
|
if dict:
|
2012-10-24 17:37:27 +04:00
|
|
|
fp.write("<<\n")
|
2010-07-31 06:52:47 +04:00
|
|
|
for k, v in dict.items():
|
|
|
|
if v is not None:
|
2012-10-24 17:37:27 +04:00
|
|
|
fp.write("/%s %s\n" % (k, v))
|
|
|
|
fp.write(">>\n")
|
2010-07-31 06:52:47 +04:00
|
|
|
|
|
|
|
def _endobj(fp):
|
2012-10-24 17:37:27 +04:00
|
|
|
fp.write("endobj\n")
|
2010-07-31 06:52:47 +04:00
|
|
|
|
|
|
|
##
|
|
|
|
# (Internal) Image save plugin for the PDF format.
|
|
|
|
|
|
|
|
def _save(im, fp, filename):
|
|
|
|
resolution = im.encoderinfo.get("resolution", 72.0)
|
|
|
|
|
|
|
|
#
|
|
|
|
# make sure image data is available
|
|
|
|
im.load()
|
|
|
|
|
|
|
|
xref = [0]*(5+1) # placeholders
|
|
|
|
|
2012-10-24 17:37:27 +04:00
|
|
|
class TextWriter:
|
|
|
|
def __init__(self, fp):
|
|
|
|
self.fp = fp
|
|
|
|
def __getattr__(self, name):
|
|
|
|
return getattr(self.fp, name)
|
|
|
|
def write(self, value):
|
|
|
|
self.fp.write(value.encode('latin-1'))
|
|
|
|
|
|
|
|
fp = TextWriter(fp)
|
|
|
|
|
|
|
|
fp.write("%PDF-1.2\n")
|
|
|
|
fp.write("% created by PIL PDF driver " + __version__ + "\n")
|
2010-07-31 06:52:47 +04:00
|
|
|
|
|
|
|
#
|
|
|
|
# Get image characteristics
|
|
|
|
|
|
|
|
width, height = im.size
|
|
|
|
|
|
|
|
# FIXME: Should replace ASCIIHexDecode with RunLengthDecode (packbits)
|
|
|
|
# or LZWDecode (tiff/lzw compression). Note that PDF 1.2 also supports
|
|
|
|
# Flatedecode (zip compression).
|
|
|
|
|
|
|
|
bits = 8
|
|
|
|
params = None
|
|
|
|
|
|
|
|
if im.mode == "1":
|
2012-10-24 17:37:27 +04:00
|
|
|
filter = "/ASCIIHexDecode"
|
|
|
|
colorspace = "/DeviceGray"
|
py3k: The big push
There are two main issues fixed with this commit:
* bytes vs. str: All file, image, and palette data are now handled as
bytes. A new _binary module consolidates the hacks needed to do this
across Python versions. tostring/fromstring methods have been renamed to
tobytes/frombytes, but the Python 2.6/2.7 versions alias them to the old
names for compatibility. Users should move to tobytes/frombytes.
One other potentially-breaking change is that text data in image files
(such as tags, comments) are now explicitly handled with a specific
character encoding in mind. This works well with the Unicode str in
Python 3, but may trip up old code expecting a straight byte-for-byte
translation to a Python string. This also required a change to Gohlke's
tags tests (in Tests/test_file_png.py) to expect Unicode strings from
the code.
* True div vs. floor div: Many division operations used the "/" operator
to do floor division, which is now the "//" operator in Python 3. These
were fixed.
As of this commit, on the first pass, I have one failing test (improper
handling of a slice object in a C module, test_imagepath.py) in Python 3,
and three that that I haven't tried running yet (test_imagegl,
test_imagegrab, and test_imageqt). I also haven't tested anything on
Windows. All but the three skipped tests run flawlessly against Pythons
2.6 and 2.7.
2012-10-21 01:01:53 +04:00
|
|
|
procset = "/ImageB" # grayscale
|
2010-07-31 06:52:47 +04:00
|
|
|
bits = 1
|
|
|
|
elif im.mode == "L":
|
2012-10-24 17:37:27 +04:00
|
|
|
filter = "/DCTDecode"
|
2010-07-31 06:52:47 +04:00
|
|
|
# params = "<< /Predictor 15 /Columns %d >>" % (width-2)
|
2012-10-24 17:37:27 +04:00
|
|
|
colorspace = "/DeviceGray"
|
py3k: The big push
There are two main issues fixed with this commit:
* bytes vs. str: All file, image, and palette data are now handled as
bytes. A new _binary module consolidates the hacks needed to do this
across Python versions. tostring/fromstring methods have been renamed to
tobytes/frombytes, but the Python 2.6/2.7 versions alias them to the old
names for compatibility. Users should move to tobytes/frombytes.
One other potentially-breaking change is that text data in image files
(such as tags, comments) are now explicitly handled with a specific
character encoding in mind. This works well with the Unicode str in
Python 3, but may trip up old code expecting a straight byte-for-byte
translation to a Python string. This also required a change to Gohlke's
tags tests (in Tests/test_file_png.py) to expect Unicode strings from
the code.
* True div vs. floor div: Many division operations used the "/" operator
to do floor division, which is now the "//" operator in Python 3. These
were fixed.
As of this commit, on the first pass, I have one failing test (improper
handling of a slice object in a C module, test_imagepath.py) in Python 3,
and three that that I haven't tried running yet (test_imagegl,
test_imagegrab, and test_imageqt). I also haven't tested anything on
Windows. All but the three skipped tests run flawlessly against Pythons
2.6 and 2.7.
2012-10-21 01:01:53 +04:00
|
|
|
procset = "/ImageB" # grayscale
|
2010-07-31 06:52:47 +04:00
|
|
|
elif im.mode == "P":
|
2012-10-24 17:37:27 +04:00
|
|
|
filter = "/ASCIIHexDecode"
|
|
|
|
colorspace = "[ /Indexed /DeviceRGB 255 <"
|
2010-07-31 06:52:47 +04:00
|
|
|
palette = im.im.getpalette("RGB")
|
|
|
|
for i in range(256):
|
py3k: The big push
There are two main issues fixed with this commit:
* bytes vs. str: All file, image, and palette data are now handled as
bytes. A new _binary module consolidates the hacks needed to do this
across Python versions. tostring/fromstring methods have been renamed to
tobytes/frombytes, but the Python 2.6/2.7 versions alias them to the old
names for compatibility. Users should move to tobytes/frombytes.
One other potentially-breaking change is that text data in image files
(such as tags, comments) are now explicitly handled with a specific
character encoding in mind. This works well with the Unicode str in
Python 3, but may trip up old code expecting a straight byte-for-byte
translation to a Python string. This also required a change to Gohlke's
tags tests (in Tests/test_file_png.py) to expect Unicode strings from
the code.
* True div vs. floor div: Many division operations used the "/" operator
to do floor division, which is now the "//" operator in Python 3. These
were fixed.
As of this commit, on the first pass, I have one failing test (improper
handling of a slice object in a C module, test_imagepath.py) in Python 3,
and three that that I haven't tried running yet (test_imagegl,
test_imagegrab, and test_imageqt). I also haven't tested anything on
Windows. All but the three skipped tests run flawlessly against Pythons
2.6 and 2.7.
2012-10-21 01:01:53 +04:00
|
|
|
r = i8(palette[i*3])
|
|
|
|
g = i8(palette[i*3+1])
|
|
|
|
b = i8(palette[i*3+2])
|
2012-10-24 17:37:27 +04:00
|
|
|
colorspace = colorspace + "%02x%02x%02x " % (r, g, b)
|
2012-10-17 07:01:19 +04:00
|
|
|
colorspace = colorspace + b"> ]"
|
py3k: The big push
There are two main issues fixed with this commit:
* bytes vs. str: All file, image, and palette data are now handled as
bytes. A new _binary module consolidates the hacks needed to do this
across Python versions. tostring/fromstring methods have been renamed to
tobytes/frombytes, but the Python 2.6/2.7 versions alias them to the old
names for compatibility. Users should move to tobytes/frombytes.
One other potentially-breaking change is that text data in image files
(such as tags, comments) are now explicitly handled with a specific
character encoding in mind. This works well with the Unicode str in
Python 3, but may trip up old code expecting a straight byte-for-byte
translation to a Python string. This also required a change to Gohlke's
tags tests (in Tests/test_file_png.py) to expect Unicode strings from
the code.
* True div vs. floor div: Many division operations used the "/" operator
to do floor division, which is now the "//" operator in Python 3. These
were fixed.
As of this commit, on the first pass, I have one failing test (improper
handling of a slice object in a C module, test_imagepath.py) in Python 3,
and three that that I haven't tried running yet (test_imagegl,
test_imagegrab, and test_imageqt). I also haven't tested anything on
Windows. All but the three skipped tests run flawlessly against Pythons
2.6 and 2.7.
2012-10-21 01:01:53 +04:00
|
|
|
procset = "/ImageI" # indexed color
|
2010-07-31 06:52:47 +04:00
|
|
|
elif im.mode == "RGB":
|
2012-10-24 17:37:27 +04:00
|
|
|
filter = "/DCTDecode"
|
|
|
|
colorspace = "/DeviceRGB"
|
py3k: The big push
There are two main issues fixed with this commit:
* bytes vs. str: All file, image, and palette data are now handled as
bytes. A new _binary module consolidates the hacks needed to do this
across Python versions. tostring/fromstring methods have been renamed to
tobytes/frombytes, but the Python 2.6/2.7 versions alias them to the old
names for compatibility. Users should move to tobytes/frombytes.
One other potentially-breaking change is that text data in image files
(such as tags, comments) are now explicitly handled with a specific
character encoding in mind. This works well with the Unicode str in
Python 3, but may trip up old code expecting a straight byte-for-byte
translation to a Python string. This also required a change to Gohlke's
tags tests (in Tests/test_file_png.py) to expect Unicode strings from
the code.
* True div vs. floor div: Many division operations used the "/" operator
to do floor division, which is now the "//" operator in Python 3. These
were fixed.
As of this commit, on the first pass, I have one failing test (improper
handling of a slice object in a C module, test_imagepath.py) in Python 3,
and three that that I haven't tried running yet (test_imagegl,
test_imagegrab, and test_imageqt). I also haven't tested anything on
Windows. All but the three skipped tests run flawlessly against Pythons
2.6 and 2.7.
2012-10-21 01:01:53 +04:00
|
|
|
procset = "/ImageC" # color images
|
2010-07-31 06:52:47 +04:00
|
|
|
elif im.mode == "CMYK":
|
2012-10-24 17:37:27 +04:00
|
|
|
filter = "/DCTDecode"
|
|
|
|
colorspace = "/DeviceCMYK"
|
py3k: The big push
There are two main issues fixed with this commit:
* bytes vs. str: All file, image, and palette data are now handled as
bytes. A new _binary module consolidates the hacks needed to do this
across Python versions. tostring/fromstring methods have been renamed to
tobytes/frombytes, but the Python 2.6/2.7 versions alias them to the old
names for compatibility. Users should move to tobytes/frombytes.
One other potentially-breaking change is that text data in image files
(such as tags, comments) are now explicitly handled with a specific
character encoding in mind. This works well with the Unicode str in
Python 3, but may trip up old code expecting a straight byte-for-byte
translation to a Python string. This also required a change to Gohlke's
tags tests (in Tests/test_file_png.py) to expect Unicode strings from
the code.
* True div vs. floor div: Many division operations used the "/" operator
to do floor division, which is now the "//" operator in Python 3. These
were fixed.
As of this commit, on the first pass, I have one failing test (improper
handling of a slice object in a C module, test_imagepath.py) in Python 3,
and three that that I haven't tried running yet (test_imagegl,
test_imagegrab, and test_imageqt). I also haven't tested anything on
Windows. All but the three skipped tests run flawlessly against Pythons
2.6 and 2.7.
2012-10-21 01:01:53 +04:00
|
|
|
procset = "/ImageC" # color images
|
2010-07-31 06:52:47 +04:00
|
|
|
else:
|
|
|
|
raise ValueError("cannot save mode %s" % im.mode)
|
|
|
|
|
|
|
|
#
|
|
|
|
# catalogue
|
|
|
|
|
|
|
|
xref[1] = fp.tell()
|
2012-10-24 17:37:27 +04:00
|
|
|
_obj(fp, 1, Type = "/Catalog",
|
|
|
|
Pages = "2 0 R")
|
2010-07-31 06:52:47 +04:00
|
|
|
_endobj(fp)
|
|
|
|
|
|
|
|
#
|
|
|
|
# pages
|
|
|
|
|
|
|
|
xref[2] = fp.tell()
|
2012-10-24 17:37:27 +04:00
|
|
|
_obj(fp, 2, Type = "/Pages",
|
2010-07-31 06:52:47 +04:00
|
|
|
Count = 1,
|
2012-10-24 17:37:27 +04:00
|
|
|
Kids = "[4 0 R]")
|
2010-07-31 06:52:47 +04:00
|
|
|
_endobj(fp)
|
|
|
|
|
|
|
|
#
|
|
|
|
# image
|
|
|
|
|
2012-10-17 07:01:19 +04:00
|
|
|
op = io.BytesIO()
|
2010-07-31 06:52:47 +04:00
|
|
|
|
2012-10-24 17:37:27 +04:00
|
|
|
if filter == "/ASCIIHexDecode":
|
2010-07-31 06:52:47 +04:00
|
|
|
if bits == 1:
|
|
|
|
# FIXME: the hex encoder doesn't support packed 1-bit
|
|
|
|
# images; do things the hard way...
|
|
|
|
data = im.tostring("raw", "1")
|
|
|
|
im = Image.new("L", (len(data), 1), None)
|
|
|
|
im.putdata(data)
|
|
|
|
ImageFile._save(im, op, [("hex", (0,0)+im.size, 0, im.mode)])
|
2012-10-24 17:37:27 +04:00
|
|
|
elif filter == "/DCTDecode":
|
2013-05-12 04:34:07 +04:00
|
|
|
Image.SAVE["JPEG"](im, op, filename)
|
2012-10-24 17:37:27 +04:00
|
|
|
elif filter == "/FlateDecode":
|
2010-07-31 06:52:47 +04:00
|
|
|
ImageFile._save(im, op, [("zip", (0,0)+im.size, 0, im.mode)])
|
2012-10-24 17:37:27 +04:00
|
|
|
elif filter == "/RunLengthDecode":
|
2010-07-31 06:52:47 +04:00
|
|
|
ImageFile._save(im, op, [("packbits", (0,0)+im.size, 0, im.mode)])
|
|
|
|
else:
|
|
|
|
raise ValueError("unsupported PDF filter (%s)" % filter)
|
|
|
|
|
|
|
|
xref[3] = fp.tell()
|
2012-10-24 17:37:27 +04:00
|
|
|
_obj(fp, 3, Type = "/XObject",
|
|
|
|
Subtype = "/Image",
|
2010-07-31 06:52:47 +04:00
|
|
|
Width = width, # * 72.0 / resolution,
|
|
|
|
Height = height, # * 72.0 / resolution,
|
|
|
|
Length = len(op.getvalue()),
|
|
|
|
Filter = filter,
|
|
|
|
BitsPerComponent = bits,
|
|
|
|
DecodeParams = params,
|
|
|
|
ColorSpace = colorspace)
|
|
|
|
|
2012-10-24 17:37:27 +04:00
|
|
|
fp.write("stream\n")
|
|
|
|
fp.fp.write(op.getvalue())
|
|
|
|
fp.write("\nendstream\n")
|
2010-07-31 06:52:47 +04:00
|
|
|
|
|
|
|
_endobj(fp)
|
|
|
|
|
|
|
|
#
|
|
|
|
# page
|
|
|
|
|
|
|
|
xref[4] = fp.tell()
|
|
|
|
_obj(fp, 4)
|
2012-10-24 17:37:27 +04:00
|
|
|
fp.write("<<\n/Type /Page\n/Parent 2 0 R\n"\
|
py3k: The big push
There are two main issues fixed with this commit:
* bytes vs. str: All file, image, and palette data are now handled as
bytes. A new _binary module consolidates the hacks needed to do this
across Python versions. tostring/fromstring methods have been renamed to
tobytes/frombytes, but the Python 2.6/2.7 versions alias them to the old
names for compatibility. Users should move to tobytes/frombytes.
One other potentially-breaking change is that text data in image files
(such as tags, comments) are now explicitly handled with a specific
character encoding in mind. This works well with the Unicode str in
Python 3, but may trip up old code expecting a straight byte-for-byte
translation to a Python string. This also required a change to Gohlke's
tags tests (in Tests/test_file_png.py) to expect Unicode strings from
the code.
* True div vs. floor div: Many division operations used the "/" operator
to do floor division, which is now the "//" operator in Python 3. These
were fixed.
As of this commit, on the first pass, I have one failing test (improper
handling of a slice object in a C module, test_imagepath.py) in Python 3,
and three that that I haven't tried running yet (test_imagegl,
test_imagegrab, and test_imageqt). I also haven't tested anything on
Windows. All but the three skipped tests run flawlessly against Pythons
2.6 and 2.7.
2012-10-21 01:01:53 +04:00
|
|
|
"/Resources <<\n/ProcSet [ /PDF %s ]\n"\
|
|
|
|
"/XObject << /image 3 0 R >>\n>>\n"\
|
|
|
|
"/MediaBox [ 0 0 %d %d ]\n/Contents 5 0 R\n>>\n" %\
|
2012-10-24 17:37:27 +04:00
|
|
|
(procset, int(width * 72.0 /resolution) , int(height * 72.0 / resolution)))
|
2010-07-31 06:52:47 +04:00
|
|
|
_endobj(fp)
|
|
|
|
|
|
|
|
#
|
|
|
|
# page contents
|
|
|
|
|
2012-10-24 17:37:27 +04:00
|
|
|
op = TextWriter(io.BytesIO())
|
2010-07-31 06:52:47 +04:00
|
|
|
|
2012-10-24 17:37:27 +04:00
|
|
|
op.write("q %d 0 0 %d 0 0 cm /image Do Q\n" % (int(width * 72.0 / resolution), int(height * 72.0 / resolution)))
|
2010-07-31 06:52:47 +04:00
|
|
|
|
|
|
|
xref[5] = fp.tell()
|
2012-10-24 17:37:27 +04:00
|
|
|
_obj(fp, 5, Length = len(op.fp.getvalue()))
|
2010-07-31 06:52:47 +04:00
|
|
|
|
2012-10-24 17:37:27 +04:00
|
|
|
fp.write("stream\n")
|
|
|
|
fp.fp.write(op.fp.getvalue())
|
|
|
|
fp.write("\nendstream\n")
|
2010-07-31 06:52:47 +04:00
|
|
|
|
|
|
|
_endobj(fp)
|
|
|
|
|
|
|
|
#
|
|
|
|
# trailer
|
|
|
|
startxref = fp.tell()
|
2012-10-24 17:37:27 +04:00
|
|
|
fp.write("xref\n0 %d\n0000000000 65535 f \n" % len(xref))
|
2010-07-31 06:52:47 +04:00
|
|
|
for x in xref[1:]:
|
2012-10-24 17:37:27 +04:00
|
|
|
fp.write("%010d 00000 n \n" % x)
|
|
|
|
fp.write("trailer\n<<\n/Size %d\n/Root 1 0 R\n>>\n" % len(xref))
|
|
|
|
fp.write("startxref\n%d\n%%%%EOF\n" % startxref)
|
2010-07-31 06:52:47 +04:00
|
|
|
fp.flush()
|
|
|
|
|
|
|
|
#
|
|
|
|
# --------------------------------------------------------------------
|
|
|
|
|
|
|
|
Image.register_save("PDF", _save)
|
|
|
|
|
|
|
|
Image.register_extension("PDF", ".pdf")
|
|
|
|
|
|
|
|
Image.register_mime("PDF", "application/pdf")
|