Pillow/src/PIL/PdfImagePlugin.py

247 lines
7.5 KiB
Python
Raw Normal View History

2010-07-31 06:52:47 +04:00
#
# The Python Imaging Library.
# $Id$
#
# PDF (Acrobat) file handling
#
# History:
# 1996-07-16 fl Created
# 1997-01-18 fl Fixed header
# 2004-02-21 fl Fixes for 1/L/CMYK images, etc.
# 2004-02-24 fl Fixes for 1 and P images.
#
# Copyright (c) 1997-2004 by Secret Labs AB. All rights reserved.
# Copyright (c) 1996-1997 by Fredrik Lundh.
#
# See the README file for information on usage and redistribution.
#
##
# Image plugin for PDF images (output only).
##
import io
2018-07-29 07:49:58 +03:00
import os
import time
2010-07-31 06:52:47 +04:00
from . import Image, ImageFile, ImageSequence, PdfParser, __version__
2010-07-31 06:52:47 +04:00
#
# --------------------------------------------------------------------
# object ids:
# 1. catalogue
# 2. pages
# 3. image
# 4. page
# 5. page contents
2014-05-12 15:56:55 +04:00
2015-09-29 15:51:52 +03:00
def _save_all(im, fp, filename):
_save(im, fp, filename, save_all=True)
2010-07-31 06:52:47 +04:00
##
# (Internal) Image save plugin for the PDF format.
2019-03-21 16:28:20 +03:00
2015-09-29 15:51:52 +03:00
def _save(im, fp, filename, save_all=False):
is_appending = im.encoderinfo.get("append", False)
if is_appending:
existing_pdf = PdfParser.PdfParser(f=fp, filename=filename, mode="r+b")
else:
existing_pdf = PdfParser.PdfParser(f=fp, filename=filename, mode="w+b")
2010-07-31 06:52:47 +04:00
resolution = im.encoderinfo.get("resolution", 72.0)
info = {
2019-03-21 16:28:20 +03:00
"title": None
if is_appending
else os.path.splitext(os.path.basename(filename))[0],
"author": None,
"subject": None,
"keywords": None,
"creator": None,
"producer": None,
"creationDate": None if is_appending else time.gmtime(),
2019-03-21 16:28:20 +03:00
"modDate": None if is_appending else time.gmtime(),
}
for k, default in info.items():
v = im.encoderinfo.get(k) if k in im.encoderinfo else default
if v:
existing_pdf.info[k[0].upper() + k[1:]] = v
2010-07-31 06:52:47 +04:00
#
# make sure image data is available
im.load()
existing_pdf.start_writing()
existing_pdf.write_header()
existing_pdf.write_comment(f"created by Pillow {__version__} PDF driver")
2010-07-31 06:52:47 +04:00
#
# pages
2017-05-13 07:26:52 +03:00
ims = [im]
2015-09-29 15:51:52 +03:00
if save_all:
2017-05-13 07:26:52 +03:00
append_images = im.encoderinfo.get("append_images", [])
for append_im in append_images:
append_im.encoderinfo = im.encoderinfo.copy()
2017-05-13 07:26:52 +03:00
ims.append(append_im)
numberOfPages = 0
image_refs = []
page_refs = []
contents_refs = []
2017-05-13 07:26:52 +03:00
for im in ims:
im_numberOfPages = 1
if save_all:
try:
im_numberOfPages = im.n_frames
except AttributeError:
2018-06-24 15:32:25 +03:00
# Image format does not have n_frames.
# It is a single frame image
2017-05-13 07:26:52 +03:00
pass
numberOfPages += im_numberOfPages
for i in range(im_numberOfPages):
image_refs.append(existing_pdf.next_object_id(0))
page_refs.append(existing_pdf.next_object_id(0))
contents_refs.append(existing_pdf.next_object_id(0))
existing_pdf.pages.append(page_refs[-1])
2015-09-29 15:51:52 +03:00
#
# catalog and list of pages
existing_pdf.write_catalog()
2010-07-31 06:52:47 +04:00
2017-05-13 07:26:52 +03:00
pageNumber = 0
for imSequence in ims:
im_pages = ImageSequence.Iterator(imSequence) if save_all else [imSequence]
for im in im_pages:
2018-06-24 15:32:25 +03:00
# FIXME: Should replace ASCIIHexDecode with RunLengthDecode
# (packbits) or LZWDecode (tiff/lzw compression). Note that
# PDF 1.2 also supports Flatedecode (zip compression).
bits = 8
params = None
2020-09-01 20:16:46 +03:00
decode = None
if im.mode == "1":
filter = "ASCIIHexDecode"
colorspace = PdfParser.PdfName("DeviceGray")
procset = "ImageB" # grayscale
bits = 1
elif im.mode == "L":
filter = "DCTDecode"
# params = f"<< /Predictor 15 /Columns {width-2} >>"
colorspace = PdfParser.PdfName("DeviceGray")
procset = "ImageB" # grayscale
elif im.mode == "P":
filter = "ASCIIHexDecode"
palette = im.im.getpalette("RGB")
2018-06-24 15:32:25 +03:00
colorspace = [
PdfParser.PdfName("Indexed"),
PdfParser.PdfName("DeviceRGB"),
255,
2019-03-21 16:28:20 +03:00
PdfParser.PdfBinary(palette),
2018-06-24 15:32:25 +03:00
]
procset = "ImageI" # indexed color
elif im.mode == "RGB":
filter = "DCTDecode"
colorspace = PdfParser.PdfName("DeviceRGB")
procset = "ImageC" # color images
elif im.mode == "CMYK":
filter = "DCTDecode"
colorspace = PdfParser.PdfName("DeviceCMYK")
procset = "ImageC" # color images
2020-09-01 20:16:46 +03:00
decode = [1, 0, 1, 0, 1, 0, 1, 0]
else:
raise ValueError(f"cannot save mode {im.mode}")
2017-05-13 07:26:52 +03:00
#
# image
op = io.BytesIO()
if filter == "ASCIIHexDecode":
2017-05-13 07:26:52 +03:00
if bits == 1:
# FIXME: the hex encoder doesn't support packed 1-bit
# images; do things the hard way...
data = im.tobytes("raw", "1")
2019-04-30 08:31:47 +03:00
im = Image.new("L", im.size)
2017-05-13 07:26:52 +03:00
im.putdata(data)
2019-03-21 16:28:20 +03:00
ImageFile._save(im, op, [("hex", (0, 0) + im.size, 0, im.mode)])
elif filter == "DCTDecode":
2017-05-13 07:26:52 +03:00
Image.SAVE["JPEG"](im, op, filename)
elif filter == "FlateDecode":
2019-03-21 16:28:20 +03:00
ImageFile._save(im, op, [("zip", (0, 0) + im.size, 0, im.mode)])
elif filter == "RunLengthDecode":
2019-03-21 16:28:20 +03:00
ImageFile._save(im, op, [("packbits", (0, 0) + im.size, 0, im.mode)])
2017-05-13 07:26:52 +03:00
else:
raise ValueError(f"unsupported PDF filter ({filter})")
2017-05-13 07:26:52 +03:00
#
# Get image characteristics
width, height = im.size
2019-03-21 16:28:20 +03:00
existing_pdf.write_obj(
image_refs[pageNumber],
stream=op.getvalue(),
Type=PdfParser.PdfName("XObject"),
Subtype=PdfParser.PdfName("Image"),
Width=width, # * 72.0 / resolution,
Height=height, # * 72.0 / resolution,
Filter=PdfParser.PdfName(filter),
BitsPerComponent=bits,
2020-09-01 20:16:46 +03:00
Decode=decode,
2019-03-21 16:28:20 +03:00
DecodeParams=params,
ColorSpace=colorspace,
)
2017-05-13 07:26:52 +03:00
#
# page
2019-03-21 16:28:20 +03:00
existing_pdf.write_page(
page_refs[pageNumber],
Resources=PdfParser.PdfDict(
ProcSet=[PdfParser.PdfName("PDF"), PdfParser.PdfName(procset)],
XObject=PdfParser.PdfDict(image=image_refs[pageNumber]),
),
MediaBox=[
0,
0,
int(width * 72.0 / resolution),
int(height * 72.0 / resolution),
],
Contents=contents_refs[pageNumber],
)
2017-05-13 07:26:52 +03:00
#
# page contents
page_contents = b"q %d 0 0 %d 0 0 cm /image Do Q\n" % (
int(width * 72.0 / resolution),
int(height * 72.0 / resolution),
2019-03-21 16:28:20 +03:00
)
2017-05-13 07:26:52 +03:00
2019-03-21 16:28:20 +03:00
existing_pdf.write_obj(contents_refs[pageNumber], stream=page_contents)
2017-05-13 07:26:52 +03:00
pageNumber += 1
2010-07-31 06:52:47 +04:00
#
# trailer
existing_pdf.write_xref_and_trailer()
if hasattr(fp, "flush"):
fp.flush()
existing_pdf.close()
2010-07-31 06:52:47 +04:00
2019-03-21 16:28:20 +03:00
2010-07-31 06:52:47 +04:00
#
# --------------------------------------------------------------------
2018-03-03 12:54:00 +03:00
2010-07-31 06:52:47 +04:00
Image.register_save("PDF", _save)
2015-09-29 15:51:52 +03:00
Image.register_save_all("PDF", _save_all)
2010-07-31 06:52:47 +04:00
Image.register_extension("PDF", ".pdf")
Image.register_mime("PDF", "application/pdf")