2010-07-31 06:52:47 +04:00
|
|
|
#
|
|
|
|
# The Python Imaging Library.
|
|
|
|
# $Id$
|
|
|
|
#
|
|
|
|
# PDF (Acrobat) file handling
|
|
|
|
#
|
|
|
|
# History:
|
|
|
|
# 1996-07-16 fl Created
|
|
|
|
# 1997-01-18 fl Fixed header
|
|
|
|
# 2004-02-21 fl Fixes for 1/L/CMYK images, etc.
|
|
|
|
# 2004-02-24 fl Fixes for 1 and P images.
|
|
|
|
#
|
|
|
|
# Copyright (c) 1997-2004 by Secret Labs AB. All rights reserved.
|
|
|
|
# Copyright (c) 1996-1997 by Fredrik Lundh.
|
|
|
|
#
|
|
|
|
# See the README file for information on usage and redistribution.
|
|
|
|
#
|
|
|
|
|
|
|
|
##
|
|
|
|
# Image plugin for PDF images (output only).
|
|
|
|
##
|
|
|
|
|
2012-10-17 07:01:19 +04:00
|
|
|
import io
|
2022-08-01 13:38:47 +03:00
|
|
|
import math
|
2018-07-29 07:49:58 +03:00
|
|
|
import os
|
2018-07-29 15:33:59 +03:00
|
|
|
import time
|
2010-07-31 06:52:47 +04:00
|
|
|
|
2022-08-22 12:57:33 +03:00
|
|
|
from . import Image, ImageFile, ImageSequence, PdfParser, __version__, features
|
2010-07-31 06:52:47 +04:00
|
|
|
|
|
|
|
#
|
|
|
|
# --------------------------------------------------------------------
|
|
|
|
|
|
|
|
# object ids:
|
|
|
|
# 1. catalogue
|
|
|
|
# 2. pages
|
|
|
|
# 3. image
|
|
|
|
# 4. page
|
|
|
|
# 5. page contents
|
|
|
|
|
2014-05-12 15:56:55 +04:00
|
|
|
|
2015-09-29 15:51:52 +03:00
|
|
|
def _save_all(im, fp, filename):
|
|
|
|
_save(im, fp, filename, save_all=True)
|
|
|
|
|
|
|
|
|
2010-07-31 06:52:47 +04:00
|
|
|
##
|
|
|
|
# (Internal) Image save plugin for the PDF format.
|
|
|
|
|
2019-03-21 16:28:20 +03:00
|
|
|
|
2015-09-29 15:51:52 +03:00
|
|
|
def _save(im, fp, filename, save_all=False):
|
2018-01-18 16:33:11 +03:00
|
|
|
is_appending = im.encoderinfo.get("append", False)
|
|
|
|
if is_appending:
|
2018-01-31 02:35:55 +03:00
|
|
|
existing_pdf = PdfParser.PdfParser(f=fp, filename=filename, mode="r+b")
|
2018-01-18 16:33:11 +03:00
|
|
|
else:
|
2018-01-31 02:35:55 +03:00
|
|
|
existing_pdf = PdfParser.PdfParser(f=fp, filename=filename, mode="w+b")
|
2010-07-31 06:52:47 +04:00
|
|
|
|
2023-02-21 12:34:41 +03:00
|
|
|
dpi = im.encoderinfo.get("dpi")
|
|
|
|
if dpi:
|
|
|
|
x_resolution = dpi[0]
|
|
|
|
y_resolution = dpi[1]
|
2023-02-23 13:12:11 +03:00
|
|
|
else:
|
|
|
|
x_resolution = y_resolution = im.encoderinfo.get("resolution", 72.0)
|
2018-07-29 15:33:59 +03:00
|
|
|
|
|
|
|
info = {
|
2019-03-21 16:28:20 +03:00
|
|
|
"title": None
|
|
|
|
if is_appending
|
|
|
|
else os.path.splitext(os.path.basename(filename))[0],
|
2018-07-29 15:33:59 +03:00
|
|
|
"author": None,
|
|
|
|
"subject": None,
|
|
|
|
"keywords": None,
|
|
|
|
"creator": None,
|
|
|
|
"producer": None,
|
|
|
|
"creationDate": None if is_appending else time.gmtime(),
|
2019-03-21 16:28:20 +03:00
|
|
|
"modDate": None if is_appending else time.gmtime(),
|
2018-07-29 15:33:59 +03:00
|
|
|
}
|
|
|
|
for k, default in info.items():
|
|
|
|
v = im.encoderinfo.get(k) if k in im.encoderinfo else default
|
|
|
|
if v:
|
|
|
|
existing_pdf.info[k[0].upper() + k[1:]] = v
|
2010-07-31 06:52:47 +04:00
|
|
|
|
|
|
|
#
|
|
|
|
# make sure image data is available
|
|
|
|
im.load()
|
|
|
|
|
2018-01-26 21:07:43 +03:00
|
|
|
existing_pdf.start_writing()
|
|
|
|
existing_pdf.write_header()
|
2020-07-16 12:43:29 +03:00
|
|
|
existing_pdf.write_comment(f"created by Pillow {__version__} PDF driver")
|
2010-07-31 06:52:47 +04:00
|
|
|
|
|
|
|
#
|
|
|
|
# pages
|
2017-05-13 07:26:52 +03:00
|
|
|
ims = [im]
|
2015-09-29 15:51:52 +03:00
|
|
|
if save_all:
|
2017-05-13 07:26:52 +03:00
|
|
|
append_images = im.encoderinfo.get("append_images", [])
|
|
|
|
for append_im in append_images:
|
2017-10-19 14:30:34 +03:00
|
|
|
append_im.encoderinfo = im.encoderinfo.copy()
|
2017-05-13 07:26:52 +03:00
|
|
|
ims.append(append_im)
|
2022-04-10 21:23:31 +03:00
|
|
|
number_of_pages = 0
|
2018-01-18 16:33:11 +03:00
|
|
|
image_refs = []
|
|
|
|
page_refs = []
|
|
|
|
contents_refs = []
|
2017-05-13 07:26:52 +03:00
|
|
|
for im in ims:
|
2022-04-10 21:23:31 +03:00
|
|
|
im_number_of_pages = 1
|
2017-05-13 07:26:52 +03:00
|
|
|
if save_all:
|
|
|
|
try:
|
2022-04-10 21:23:31 +03:00
|
|
|
im_number_of_pages = im.n_frames
|
2017-05-13 07:26:52 +03:00
|
|
|
except AttributeError:
|
2018-06-24 15:32:25 +03:00
|
|
|
# Image format does not have n_frames.
|
|
|
|
# It is a single frame image
|
2017-05-13 07:26:52 +03:00
|
|
|
pass
|
2022-04-10 21:23:31 +03:00
|
|
|
number_of_pages += im_number_of_pages
|
|
|
|
for i in range(im_number_of_pages):
|
2018-01-18 16:33:11 +03:00
|
|
|
image_refs.append(existing_pdf.next_object_id(0))
|
|
|
|
page_refs.append(existing_pdf.next_object_id(0))
|
|
|
|
contents_refs.append(existing_pdf.next_object_id(0))
|
|
|
|
existing_pdf.pages.append(page_refs[-1])
|
2015-09-29 15:51:52 +03:00
|
|
|
|
2018-01-24 04:28:39 +03:00
|
|
|
#
|
|
|
|
# catalog and list of pages
|
2018-01-26 21:07:43 +03:00
|
|
|
existing_pdf.write_catalog()
|
2010-07-31 06:52:47 +04:00
|
|
|
|
2022-04-10 21:23:31 +03:00
|
|
|
page_number = 0
|
|
|
|
for im_sequence in ims:
|
|
|
|
im_pages = ImageSequence.Iterator(im_sequence) if save_all else [im_sequence]
|
2018-05-18 15:15:45 +03:00
|
|
|
for im in im_pages:
|
2018-06-24 15:32:25 +03:00
|
|
|
# FIXME: Should replace ASCIIHexDecode with RunLengthDecode
|
|
|
|
# (packbits) or LZWDecode (tiff/lzw compression). Note that
|
|
|
|
# PDF 1.2 also supports Flatedecode (zip compression).
|
2018-01-18 16:33:11 +03:00
|
|
|
|
|
|
|
bits = 8
|
|
|
|
params = None
|
2020-09-01 20:16:46 +03:00
|
|
|
decode = None
|
2018-01-18 16:33:11 +03:00
|
|
|
|
2022-08-01 13:38:47 +03:00
|
|
|
#
|
|
|
|
# Get image characteristics
|
|
|
|
|
|
|
|
width, height = im.size
|
|
|
|
|
2018-01-18 16:33:11 +03:00
|
|
|
if im.mode == "1":
|
2022-08-22 12:57:33 +03:00
|
|
|
if features.check("libtiff"):
|
|
|
|
filter = "CCITTFaxDecode"
|
|
|
|
bits = 1
|
|
|
|
params = PdfParser.PdfArray(
|
|
|
|
[
|
|
|
|
PdfParser.PdfDict(
|
|
|
|
{
|
|
|
|
"K": -1,
|
|
|
|
"BlackIs1": True,
|
|
|
|
"Columns": width,
|
|
|
|
"Rows": height,
|
|
|
|
}
|
|
|
|
)
|
|
|
|
]
|
|
|
|
)
|
|
|
|
else:
|
|
|
|
filter = "DCTDecode"
|
2018-01-31 02:35:55 +03:00
|
|
|
colorspace = PdfParser.PdfName("DeviceGray")
|
2018-01-18 16:33:11 +03:00
|
|
|
procset = "ImageB" # grayscale
|
|
|
|
elif im.mode == "L":
|
|
|
|
filter = "DCTDecode"
|
2020-07-16 12:43:29 +03:00
|
|
|
# params = f"<< /Predictor 15 /Columns {width-2} >>"
|
2018-01-31 02:35:55 +03:00
|
|
|
colorspace = PdfParser.PdfName("DeviceGray")
|
2018-01-18 16:33:11 +03:00
|
|
|
procset = "ImageB" # grayscale
|
|
|
|
elif im.mode == "P":
|
|
|
|
filter = "ASCIIHexDecode"
|
2021-11-29 09:49:06 +03:00
|
|
|
palette = im.getpalette()
|
2018-06-24 15:32:25 +03:00
|
|
|
colorspace = [
|
|
|
|
PdfParser.PdfName("Indexed"),
|
|
|
|
PdfParser.PdfName("DeviceRGB"),
|
|
|
|
255,
|
2019-03-21 16:28:20 +03:00
|
|
|
PdfParser.PdfBinary(palette),
|
2018-06-24 15:32:25 +03:00
|
|
|
]
|
2018-01-18 16:33:11 +03:00
|
|
|
procset = "ImageI" # indexed color
|
|
|
|
elif im.mode == "RGB":
|
|
|
|
filter = "DCTDecode"
|
2018-01-31 02:35:55 +03:00
|
|
|
colorspace = PdfParser.PdfName("DeviceRGB")
|
2018-01-18 16:33:11 +03:00
|
|
|
procset = "ImageC" # color images
|
2023-02-02 13:48:47 +03:00
|
|
|
elif im.mode == "RGBA":
|
|
|
|
filter = "JPXDecode"
|
|
|
|
colorspace = PdfParser.PdfName("DeviceRGB")
|
|
|
|
procset = "ImageC" # color images
|
2018-01-18 16:33:11 +03:00
|
|
|
elif im.mode == "CMYK":
|
|
|
|
filter = "DCTDecode"
|
2018-01-31 02:35:55 +03:00
|
|
|
colorspace = PdfParser.PdfName("DeviceCMYK")
|
2018-01-18 16:33:11 +03:00
|
|
|
procset = "ImageC" # color images
|
2020-09-01 20:16:46 +03:00
|
|
|
decode = [1, 0, 1, 0, 1, 0, 1, 0]
|
2018-01-18 16:33:11 +03:00
|
|
|
else:
|
2022-12-22 00:51:35 +03:00
|
|
|
msg = f"cannot save mode {im.mode}"
|
|
|
|
raise ValueError(msg)
|
2018-01-18 16:33:11 +03:00
|
|
|
|
2017-05-13 07:26:52 +03:00
|
|
|
#
|
|
|
|
# image
|
|
|
|
|
|
|
|
op = io.BytesIO()
|
|
|
|
|
2018-01-18 16:33:11 +03:00
|
|
|
if filter == "ASCIIHexDecode":
|
2019-03-21 16:28:20 +03:00
|
|
|
ImageFile._save(im, op, [("hex", (0, 0) + im.size, 0, im.mode)])
|
2022-08-01 13:38:47 +03:00
|
|
|
elif filter == "CCITTFaxDecode":
|
2022-08-01 14:41:17 +03:00
|
|
|
im.save(
|
|
|
|
op,
|
|
|
|
"TIFF",
|
|
|
|
compression="group4",
|
|
|
|
# use a single strip
|
|
|
|
strip_size=math.ceil(im.width / 8) * im.height,
|
|
|
|
)
|
2018-01-18 16:33:11 +03:00
|
|
|
elif filter == "DCTDecode":
|
2017-05-13 07:26:52 +03:00
|
|
|
Image.SAVE["JPEG"](im, op, filename)
|
2023-02-02 13:48:47 +03:00
|
|
|
elif filter == "JPXDecode":
|
|
|
|
Image.SAVE["JPEG2000"](im, op, filename)
|
2018-01-18 16:33:11 +03:00
|
|
|
elif filter == "FlateDecode":
|
2019-03-21 16:28:20 +03:00
|
|
|
ImageFile._save(im, op, [("zip", (0, 0) + im.size, 0, im.mode)])
|
2018-01-18 16:33:11 +03:00
|
|
|
elif filter == "RunLengthDecode":
|
2019-03-21 16:28:20 +03:00
|
|
|
ImageFile._save(im, op, [("packbits", (0, 0) + im.size, 0, im.mode)])
|
2017-05-13 07:26:52 +03:00
|
|
|
else:
|
2022-12-22 00:51:35 +03:00
|
|
|
msg = f"unsupported PDF filter ({filter})"
|
|
|
|
raise ValueError(msg)
|
2017-05-13 07:26:52 +03:00
|
|
|
|
2022-08-01 13:38:47 +03:00
|
|
|
stream = op.getvalue()
|
|
|
|
if filter == "CCITTFaxDecode":
|
|
|
|
stream = stream[8:]
|
|
|
|
filter = PdfParser.PdfArray([PdfParser.PdfName(filter)])
|
|
|
|
else:
|
|
|
|
filter = PdfParser.PdfName(filter)
|
2017-05-13 07:26:52 +03:00
|
|
|
|
2019-03-21 16:28:20 +03:00
|
|
|
existing_pdf.write_obj(
|
2022-04-10 21:23:31 +03:00
|
|
|
image_refs[page_number],
|
2022-08-01 13:38:47 +03:00
|
|
|
stream=stream,
|
2019-03-21 16:28:20 +03:00
|
|
|
Type=PdfParser.PdfName("XObject"),
|
|
|
|
Subtype=PdfParser.PdfName("Image"),
|
2023-02-21 12:34:41 +03:00
|
|
|
Width=width, # * 72.0 / x_resolution,
|
|
|
|
Height=height, # * 72.0 / y_resolution,
|
2022-08-01 13:38:47 +03:00
|
|
|
Filter=filter,
|
2019-03-21 16:28:20 +03:00
|
|
|
BitsPerComponent=bits,
|
2020-09-01 20:16:46 +03:00
|
|
|
Decode=decode,
|
2022-08-01 13:38:47 +03:00
|
|
|
DecodeParms=params,
|
2019-03-21 16:28:20 +03:00
|
|
|
ColorSpace=colorspace,
|
|
|
|
)
|
2017-05-13 07:26:52 +03:00
|
|
|
|
|
|
|
#
|
|
|
|
# page
|
|
|
|
|
2019-03-21 16:28:20 +03:00
|
|
|
existing_pdf.write_page(
|
2022-04-10 21:23:31 +03:00
|
|
|
page_refs[page_number],
|
2019-03-21 16:28:20 +03:00
|
|
|
Resources=PdfParser.PdfDict(
|
|
|
|
ProcSet=[PdfParser.PdfName("PDF"), PdfParser.PdfName(procset)],
|
2022-04-10 21:23:31 +03:00
|
|
|
XObject=PdfParser.PdfDict(image=image_refs[page_number]),
|
2019-03-21 16:28:20 +03:00
|
|
|
),
|
|
|
|
MediaBox=[
|
|
|
|
0,
|
|
|
|
0,
|
2023-02-21 12:34:41 +03:00
|
|
|
width * 72.0 / x_resolution,
|
|
|
|
height * 72.0 / y_resolution,
|
2019-03-21 16:28:20 +03:00
|
|
|
],
|
2022-04-10 21:23:31 +03:00
|
|
|
Contents=contents_refs[page_number],
|
2019-03-21 16:28:20 +03:00
|
|
|
)
|
2017-05-13 07:26:52 +03:00
|
|
|
|
|
|
|
#
|
|
|
|
# page contents
|
|
|
|
|
2021-05-01 05:13:09 +03:00
|
|
|
page_contents = b"q %f 0 0 %f 0 0 cm /image Do Q\n" % (
|
2023-02-21 12:34:41 +03:00
|
|
|
width * 72.0 / x_resolution,
|
|
|
|
height * 72.0 / y_resolution,
|
2019-03-21 16:28:20 +03:00
|
|
|
)
|
2017-05-13 07:26:52 +03:00
|
|
|
|
2022-04-10 21:23:31 +03:00
|
|
|
existing_pdf.write_obj(contents_refs[page_number], stream=page_contents)
|
2017-05-13 07:26:52 +03:00
|
|
|
|
2022-04-10 21:23:31 +03:00
|
|
|
page_number += 1
|
2010-07-31 06:52:47 +04:00
|
|
|
|
|
|
|
#
|
|
|
|
# trailer
|
2018-01-26 21:07:43 +03:00
|
|
|
existing_pdf.write_xref_and_trailer()
|
2015-09-02 16:48:22 +03:00
|
|
|
if hasattr(fp, "flush"):
|
|
|
|
fp.flush()
|
2018-01-26 21:07:43 +03:00
|
|
|
existing_pdf.close()
|
2010-07-31 06:52:47 +04:00
|
|
|
|
2019-03-21 16:28:20 +03:00
|
|
|
|
2010-07-31 06:52:47 +04:00
|
|
|
#
|
|
|
|
# --------------------------------------------------------------------
|
|
|
|
|
2018-03-03 12:54:00 +03:00
|
|
|
|
2010-07-31 06:52:47 +04:00
|
|
|
Image.register_save("PDF", _save)
|
2015-09-29 15:51:52 +03:00
|
|
|
Image.register_save_all("PDF", _save_all)
|
2010-07-31 06:52:47 +04:00
|
|
|
|
|
|
|
Image.register_extension("PDF", ".pdf")
|
|
|
|
|
|
|
|
Image.register_mime("PDF", "application/pdf")
|