Pillow/src/PIL/PdfImagePlugin.py

#
# The Python Imaging Library.
# $Id$
#
# PDF (Acrobat) file handling
#
# History:
# 1996-07-16 fl   Created
# 1997-01-18 fl   Fixed header
# 2004-02-21 fl   Fixes for 1/L/CMYK images, etc.
# 2004-02-24 fl   Fixes for 1 and P images.
#
# Copyright (c) 1997-2004 by Secret Labs AB.  All rights reserved.
# Copyright (c) 1996-1997 by Fredrik Lundh.
#
# See the README file for information on usage and redistribution.
#

##
# Image plugin for PDF images (output only).
##

import io
import os
import time

from . import Image, ImageFile, ImageSequence, PdfParser, __version__

#
# --------------------------------------------------------------------

# object ids:
#  1. catalogue
#  2. pages
#  3. image
#  4. page
#  5. page contents


def _save_all(im, fp, filename):
    _save(im, fp, filename, save_all=True)


##
# (Internal) Image save plugin for the PDF format.


def _save(im, fp, filename, save_all=False):
    is_appending = im.encoderinfo.get("append", False)
    if is_appending:
        existing_pdf = PdfParser.PdfParser(f=fp, filename=filename, mode="r+b")
    else:
        existing_pdf = PdfParser.PdfParser(f=fp, filename=filename, mode="w+b")

    resolution = im.encoderinfo.get("resolution", 72.0)

    info = {
        "title": None
        if is_appending
        else os.path.splitext(os.path.basename(filename))[0],
        "author": None,
        "subject": None,
        "keywords": None,
        "creator": None,
        "producer": None,
        "creationDate": None if is_appending else time.gmtime(),
        "modDate": None if is_appending else time.gmtime(),
    }
    for k, default in info.items():
        v = im.encoderinfo.get(k) if k in im.encoderinfo else default
        if v:
            existing_pdf.info[k[0].upper() + k[1:]] = v

    #
    # make sure image data is available
    im.load()

    existing_pdf.start_writing()
    existing_pdf.write_header()
    existing_pdf.write_comment(f"created by Pillow {__version__} PDF driver")

    #
    # pages
    ims = [im]
    if save_all:
        append_images = im.encoderinfo.get("append_images", [])
        for append_im in append_images:
            append_im.encoderinfo = im.encoderinfo.copy()
            ims.append(append_im)
    numberOfPages = 0
    image_refs = []
    page_refs = []
    contents_refs = []
    for im in ims:
        im_numberOfPages = 1
        if save_all:
            try:
                im_numberOfPages = im.n_frames
            except AttributeError:
                # Image format does not have n_frames.
                # It is a single frame image
                pass
        numberOfPages += im_numberOfPages
        for i in range(im_numberOfPages):
            image_refs.append(existing_pdf.next_object_id(0))
            page_refs.append(existing_pdf.next_object_id(0))
            contents_refs.append(existing_pdf.next_object_id(0))
            existing_pdf.pages.append(page_refs[-1])

    #
    # catalog and list of pages
    existing_pdf.write_catalog()

    pageNumber = 0
    for imSequence in ims:
        im_pages = ImageSequence.Iterator(imSequence) if save_all else [imSequence]
        for im in im_pages:
            # FIXME: Should replace ASCIIHexDecode with RunLengthDecode
            # (packbits) or LZWDecode (tiff/lzw compression).  Note that
            # PDF 1.2 also supports Flatedecode (zip compression).

            bits = 8
            params = None
            decode = None

            if im.mode == "1":
                filter = "ASCIIHexDecode"
                colorspace = PdfParser.PdfName("DeviceGray")
                procset = "ImageB"  # grayscale
                bits = 1
            elif im.mode == "L":
                filter = "DCTDecode"
                # params = f"<< /Predictor 15 /Columns {width-2} >>"
                colorspace = PdfParser.PdfName("DeviceGray")
                procset = "ImageB"  # grayscale
            elif im.mode == "P":
                filter = "ASCIIHexDecode"
                palette = im.im.getpalette("RGB")
                colorspace = [
                    PdfParser.PdfName("Indexed"),
                    PdfParser.PdfName("DeviceRGB"),
                    255,
                    PdfParser.PdfBinary(palette),
                ]
                procset = "ImageI"  # indexed color
            elif im.mode == "RGB":
                filter = "DCTDecode"
                colorspace = PdfParser.PdfName("DeviceRGB")
                procset = "ImageC"  # color images
            elif im.mode == "CMYK":
                filter = "DCTDecode"
                colorspace = PdfParser.PdfName("DeviceCMYK")
                procset = "ImageC"  # color images
                decode = [1, 0, 1, 0, 1, 0, 1, 0]
            else:
                raise ValueError(f"cannot save mode {im.mode}")

            #
            # image

            op = io.BytesIO()

            if filter == "ASCIIHexDecode":
                if bits == 1:
                    # FIXME: the hex encoder doesn't support packed 1-bit
                    # images; do things the hard way...
                    data = im.tobytes("raw", "1")
                    im = Image.new("L", im.size)
                    im.putdata(data)
                ImageFile._save(im, op, [("hex", (0, 0) + im.size, 0, im.mode)])
            elif filter == "DCTDecode":
                Image.SAVE["JPEG"](im, op, filename)
            elif filter == "FlateDecode":
                ImageFile._save(im, op, [("zip", (0, 0) + im.size, 0, im.mode)])
            elif filter == "RunLengthDecode":
                ImageFile._save(im, op, [("packbits", (0, 0) + im.size, 0, im.mode)])
            else:
                raise ValueError(f"unsupported PDF filter ({filter})")

            #
            # Get image characteristics

            width, height = im.size

            existing_pdf.write_obj(
                image_refs[pageNumber],
                stream=op.getvalue(),
                Type=PdfParser.PdfName("XObject"),
                Subtype=PdfParser.PdfName("Image"),
                Width=width,  # * 72.0 / resolution,
                Height=height,  # * 72.0 / resolution,
                Filter=PdfParser.PdfName(filter),
                BitsPerComponent=bits,
                Decode=decode,
                DecodeParams=params,
                ColorSpace=colorspace,
            )

            #
            # page

            existing_pdf.write_page(
                page_refs[pageNumber],
                Resources=PdfParser.PdfDict(
                    ProcSet=[PdfParser.PdfName("PDF"), PdfParser.PdfName(procset)],
                    XObject=PdfParser.PdfDict(image=image_refs[pageNumber]),
                ),
                MediaBox=[
                    0,
                    0,
                    int(width * 72.0 / resolution),
                    int(height * 72.0 / resolution),
                ],
                Contents=contents_refs[pageNumber],
            )

            #
            # page contents

            page_contents = b"q %d 0 0 %d 0 0 cm /image Do Q\n" % (
                int(width * 72.0 / resolution),
                int(height * 72.0 / resolution),
            )

            existing_pdf.write_obj(contents_refs[pageNumber], stream=page_contents)

            pageNumber += 1

    #
    # trailer
    existing_pdf.write_xref_and_trailer()
    if hasattr(fp, "flush"):
        fp.flush()
    existing_pdf.close()


#
# --------------------------------------------------------------------


Image.register_save("PDF", _save)
Image.register_save_all("PDF", _save_all)

Image.register_extension("PDF", ".pdf")

Image.register_mime("PDF", "application/pdf")
Forking PIL 2010-07-31 06:52:47 +04:00			`#`
			`# The Python Imaging Library.`
			`# $Id$`
			`#`
			`# PDF (Acrobat) file handling`
			`#`
			`# History:`
			`# 1996-07-16 fl Created`
			`# 1997-01-18 fl Fixed header`
			`# 2004-02-21 fl Fixes for 1/L/CMYK images, etc.`
			`# 2004-02-24 fl Fixes for 1 and P images.`
			`#`
			`# Copyright (c) 1997-2004 by Secret Labs AB. All rights reserved.`
			`# Copyright (c) 1996-1997 by Fredrik Lundh.`
			`#`
			`# See the README file for information on usage and redistribution.`
			`#`

			`##`
			`# Image plugin for PDF images (output only).`
			`##`

py3k: Convert StringIO.StringIO to io.BytesIO io.BytesIO is already in 2.6. Some of the more obvious bytes literals are marked in this commit. 2012-10-17 07:01:19 +04:00			`import io`
Set PDF title to filename by default 2018-07-29 07:49:58 +03:00			`import os`
Added PDF creation and modification date info 2018-07-29 15:33:59 +03:00			`import time`
Forking PIL 2010-07-31 06:52:47 +04:00
Remove deprecated __version__ from plugins 2019-11-05 18:00:34 +03:00			`from . import Image, ImageFile, ImageSequence, PdfParser, __version__`
Forking PIL 2010-07-31 06:52:47 +04:00
			`#`
			`# --------------------------------------------------------------------`

			`# object ids:`
			`# 1. catalogue`
			`# 2. pages`
			`# 3. image`
			`# 4. page`
			`# 5. page contents`

pep8 and pyflakes 2014-05-12 15:56:55 +04:00
Added PDF multipage saving 2015-09-29 15:51:52 +03:00			`def _save_all(im, fp, filename):`
			`_save(im, fp, filename, save_all=True)`


Forking PIL 2010-07-31 06:52:47 +04:00			`##`
			`# (Internal) Image save plugin for the PDF format.`

Format with Black 2019-03-21 16:28:20 +03:00
Added PDF multipage saving 2015-09-29 15:51:52 +03:00			`def _save(im, fp, filename, save_all=False):`
issue #2959: support appending to existing PDFs 2018-01-18 16:33:11 +03:00			`is_appending = im.encoderinfo.get("append", False)`
			`if is_appending:`
issue #2959: rename pdfParser.py to PdfParser.py 2018-01-31 02:35:55 +03:00			`existing_pdf = PdfParser.PdfParser(f=fp, filename=filename, mode="r+b")`
issue #2959: support appending to existing PDFs 2018-01-18 16:33:11 +03:00			`else:`
issue #2959: rename pdfParser.py to PdfParser.py 2018-01-31 02:35:55 +03:00			`existing_pdf = PdfParser.PdfParser(f=fp, filename=filename, mode="w+b")`
Forking PIL 2010-07-31 06:52:47 +04:00
Added PDF creation and modification date info 2018-07-29 15:33:59 +03:00			`resolution = im.encoderinfo.get("resolution", 72.0)`

			`info = {`
Format with Black 2019-03-21 16:28:20 +03:00			`"title": None`
			`if is_appending`
			`else os.path.splitext(os.path.basename(filename))[0],`
Added PDF creation and modification date info 2018-07-29 15:33:59 +03:00			`"author": None,`
			`"subject": None,`
			`"keywords": None,`
			`"creator": None,`
			`"producer": None,`
			`"creationDate": None if is_appending else time.gmtime(),`
Format with Black 2019-03-21 16:28:20 +03:00			`"modDate": None if is_appending else time.gmtime(),`
Added PDF creation and modification date info 2018-07-29 15:33:59 +03:00			`}`
			`for k, default in info.items():`
			`v = im.encoderinfo.get(k) if k in im.encoderinfo else default`
			`if v:`
			`existing_pdf.info[k[0].upper() + k[1:]] = v`
Forking PIL 2010-07-31 06:52:47 +04:00
			`#`
			`# make sure image data is available`
			`im.load()`

issue #2959: keep file open, add context manager, add methods to support writing, eliminate the passing of file or buffer 2018-01-26 21:07:43 +03:00			`existing_pdf.start_writing()`
			`existing_pdf.write_header()`
Upgrade Python syntax for 3.6+ Co-authored-by: nulano <nulano@nulano.eu> 2020-07-16 12:43:29 +03:00			`existing_pdf.write_comment(f"created by Pillow {__version__} PDF driver")`
Forking PIL 2010-07-31 06:52:47 +04:00
			`#`
			`# pages`
Added append_images to PDF saving 2017-05-13 07:26:52 +03:00			`ims = [im]`
Added PDF multipage saving 2015-09-29 15:51:52 +03:00			`if save_all:`
Added append_images to PDF saving 2017-05-13 07:26:52 +03:00			`append_images = im.encoderinfo.get("append_images", [])`
			`for append_im in append_images:`
Fix #2804 : sets encoderinfo for images when saving multi-page PDF 2017-10-19 14:30:34 +03:00			`append_im.encoderinfo = im.encoderinfo.copy()`
Added append_images to PDF saving 2017-05-13 07:26:52 +03:00			`ims.append(append_im)`
			`numberOfPages = 0`
issue #2959: support appending to existing PDFs 2018-01-18 16:33:11 +03:00			`image_refs = []`
			`page_refs = []`
			`contents_refs = []`
Added append_images to PDF saving 2017-05-13 07:26:52 +03:00			`for im in ims:`
			`im_numberOfPages = 1`
			`if save_all:`
			`try:`
			`im_numberOfPages = im.n_frames`
			`except AttributeError:`
Line too long 2018-06-24 15:32:25 +03:00			`# Image format does not have n_frames.`
			`# It is a single frame image`
Added append_images to PDF saving 2017-05-13 07:26:52 +03:00			`pass`
			`numberOfPages += im_numberOfPages`
issue #2959: support appending to existing PDFs 2018-01-18 16:33:11 +03:00			`for i in range(im_numberOfPages):`
			`image_refs.append(existing_pdf.next_object_id(0))`
			`page_refs.append(existing_pdf.next_object_id(0))`
			`contents_refs.append(existing_pdf.next_object_id(0))`
			`existing_pdf.pages.append(page_refs[-1])`
Added PDF multipage saving 2015-09-29 15:51:52 +03:00
issue #2959: add tests and fixes, text encoding, remove remnants of text writing from PdfImagePlugin 2018-01-24 04:28:39 +03:00			`#`
			`# catalog and list of pages`
issue #2959: keep file open, add context manager, add methods to support writing, eliminate the passing of file or buffer 2018-01-26 21:07:43 +03:00			`existing_pdf.write_catalog()`
Forking PIL 2010-07-31 06:52:47 +04:00
Added append_images to PDF saving 2017-05-13 07:26:52 +03:00			`pageNumber = 0`
			`for imSequence in ims:`
Fixed saving a multiframe image as a single frame PDF 2018-05-18 15:15:45 +03:00			`im_pages = ImageSequence.Iterator(imSequence) if save_all else [imSequence]`
			`for im in im_pages:`
Line too long 2018-06-24 15:32:25 +03:00			`# FIXME: Should replace ASCIIHexDecode with RunLengthDecode`
			`# (packbits) or LZWDecode (tiff/lzw compression). Note that`
			`# PDF 1.2 also supports Flatedecode (zip compression).`
issue #2959: support appending to existing PDFs 2018-01-18 16:33:11 +03:00
			`bits = 8`
			`params = None`
Merge branch 'master' into rm-3.5 2020-09-01 20:16:46 +03:00			`decode = None`
issue #2959: support appending to existing PDFs 2018-01-18 16:33:11 +03:00
			`if im.mode == "1":`
			`filter = "ASCIIHexDecode"`
issue #2959: rename pdfParser.py to PdfParser.py 2018-01-31 02:35:55 +03:00			`colorspace = PdfParser.PdfName("DeviceGray")`
issue #2959: support appending to existing PDFs 2018-01-18 16:33:11 +03:00			`procset = "ImageB" # grayscale`
			`bits = 1`
			`elif im.mode == "L":`
			`filter = "DCTDecode"`
Upgrade Python syntax for 3.6+ Co-authored-by: nulano <nulano@nulano.eu> 2020-07-16 12:43:29 +03:00			`# params = f"<< /Predictor 15 /Columns {width-2} >>"`
issue #2959: rename pdfParser.py to PdfParser.py 2018-01-31 02:35:55 +03:00			`colorspace = PdfParser.PdfName("DeviceGray")`
issue #2959: support appending to existing PDFs 2018-01-18 16:33:11 +03:00			`procset = "ImageB" # grayscale`
			`elif im.mode == "P":`
			`filter = "ASCIIHexDecode"`
			`palette = im.im.getpalette("RGB")`
Line too long 2018-06-24 15:32:25 +03:00			`colorspace = [`
			`PdfParser.PdfName("Indexed"),`
			`PdfParser.PdfName("DeviceRGB"),`
			`255,`
Format with Black 2019-03-21 16:28:20 +03:00			`PdfParser.PdfBinary(palette),`
Line too long 2018-06-24 15:32:25 +03:00			`]`
issue #2959: support appending to existing PDFs 2018-01-18 16:33:11 +03:00			`procset = "ImageI" # indexed color`
			`elif im.mode == "RGB":`
			`filter = "DCTDecode"`
issue #2959: rename pdfParser.py to PdfParser.py 2018-01-31 02:35:55 +03:00			`colorspace = PdfParser.PdfName("DeviceRGB")`
issue #2959: support appending to existing PDFs 2018-01-18 16:33:11 +03:00			`procset = "ImageC" # color images`
			`elif im.mode == "CMYK":`
			`filter = "DCTDecode"`
issue #2959: rename pdfParser.py to PdfParser.py 2018-01-31 02:35:55 +03:00			`colorspace = PdfParser.PdfName("DeviceCMYK")`
issue #2959: support appending to existing PDFs 2018-01-18 16:33:11 +03:00			`procset = "ImageC" # color images`
Merge branch 'master' into rm-3.5 2020-09-01 20:16:46 +03:00			`decode = [1, 0, 1, 0, 1, 0, 1, 0]`
issue #2959: support appending to existing PDFs 2018-01-18 16:33:11 +03:00			`else:`
Upgrade Python syntax for 3.6+ Co-authored-by: nulano <nulano@nulano.eu> 2020-07-16 12:43:29 +03:00			`raise ValueError(f"cannot save mode {im.mode}")`
issue #2959: support appending to existing PDFs 2018-01-18 16:33:11 +03:00
Added append_images to PDF saving 2017-05-13 07:26:52 +03:00			`#`
			`# image`

			`op = io.BytesIO()`

issue #2959: support appending to existing PDFs 2018-01-18 16:33:11 +03:00			`if filter == "ASCIIHexDecode":`
Added append_images to PDF saving 2017-05-13 07:26:52 +03:00			`if bits == 1:`
			`# FIXME: the hex encoder doesn't support packed 1-bit`
			`# images; do things the hard way...`
			`data = im.tobytes("raw", "1")`
Fixed dimensions of 1-bit PDFs 2019-04-30 08:31:47 +03:00			`im = Image.new("L", im.size)`
Added append_images to PDF saving 2017-05-13 07:26:52 +03:00			`im.putdata(data)`
Format with Black 2019-03-21 16:28:20 +03:00			`ImageFile._save(im, op, [("hex", (0, 0) + im.size, 0, im.mode)])`
issue #2959: support appending to existing PDFs 2018-01-18 16:33:11 +03:00			`elif filter == "DCTDecode":`
Added append_images to PDF saving 2017-05-13 07:26:52 +03:00			`Image.SAVE["JPEG"](im, op, filename)`
issue #2959: support appending to existing PDFs 2018-01-18 16:33:11 +03:00			`elif filter == "FlateDecode":`
Format with Black 2019-03-21 16:28:20 +03:00			`ImageFile._save(im, op, [("zip", (0, 0) + im.size, 0, im.mode)])`
issue #2959: support appending to existing PDFs 2018-01-18 16:33:11 +03:00			`elif filter == "RunLengthDecode":`
Format with Black 2019-03-21 16:28:20 +03:00			`ImageFile._save(im, op, [("packbits", (0, 0) + im.size, 0, im.mode)])`
Added append_images to PDF saving 2017-05-13 07:26:52 +03:00			`else:`
Upgrade Python syntax for 3.6+ Co-authored-by: nulano <nulano@nulano.eu> 2020-07-16 12:43:29 +03:00			`raise ValueError(f"unsupported PDF filter ({filter})")`
Added append_images to PDF saving 2017-05-13 07:26:52 +03:00
			`#`
			`# Get image characteristics`

			`width, height = im.size`

Format with Black 2019-03-21 16:28:20 +03:00			`existing_pdf.write_obj(`
			`image_refs[pageNumber],`
			`stream=op.getvalue(),`
			`Type=PdfParser.PdfName("XObject"),`
			`Subtype=PdfParser.PdfName("Image"),`
			`Width=width, # * 72.0 / resolution,`
			`Height=height, # * 72.0 / resolution,`
			`Filter=PdfParser.PdfName(filter),`
			`BitsPerComponent=bits,`
Merge branch 'master' into rm-3.5 2020-09-01 20:16:46 +03:00			`Decode=decode,`
Format with Black 2019-03-21 16:28:20 +03:00			`DecodeParams=params,`
			`ColorSpace=colorspace,`
			`)`
Added append_images to PDF saving 2017-05-13 07:26:52 +03:00
			`#`
			`# page`

Format with Black 2019-03-21 16:28:20 +03:00			`existing_pdf.write_page(`
			`page_refs[pageNumber],`
			`Resources=PdfParser.PdfDict(`
			`ProcSet=[PdfParser.PdfName("PDF"), PdfParser.PdfName(procset)],`
			`XObject=PdfParser.PdfDict(image=image_refs[pageNumber]),`
			`),`
			`MediaBox=[`
			`0,`
			`0,`
			`int(width * 72.0 / resolution),`
			`int(height * 72.0 / resolution),`
			`],`
			`Contents=contents_refs[pageNumber],`
			`)`
Added append_images to PDF saving 2017-05-13 07:26:52 +03:00
			`#`
			`# page contents`

Remove unnecessary make_bytes() function The function was introduced in 65112bad7e4a692ea01980a642e540ec4c0a2fcb to handle the differences between Python 2 & 3 byte handling. Now that Python 3 supports byte formatting, can drop the unnecessary compatibility shim in favor of native features. 2020-01-27 01:46:27 +03:00			`page_contents = b"q %d 0 0 %d 0 0 cm /image Do Q\n" % (`
			`int(width * 72.0 / resolution),`
			`int(height * 72.0 / resolution),`
Format with Black 2019-03-21 16:28:20 +03:00			`)`
Added append_images to PDF saving 2017-05-13 07:26:52 +03:00
Format with Black 2019-03-21 16:28:20 +03:00			`existing_pdf.write_obj(contents_refs[pageNumber], stream=page_contents)`
Added append_images to PDF saving 2017-05-13 07:26:52 +03:00
			`pageNumber += 1`
Forking PIL 2010-07-31 06:52:47 +04:00
			`#`
			`# trailer`
issue #2959: keep file open, add context manager, add methods to support writing, eliminate the passing of file or buffer 2018-01-26 21:07:43 +03:00			`existing_pdf.write_xref_and_trailer()`
Improved consistency of checks for flush 2015-09-02 16:48:22 +03:00			`if hasattr(fp, "flush"):`
			`fp.flush()`
issue #2959: keep file open, add context manager, add methods to support writing, eliminate the passing of file or buffer 2018-01-26 21:07:43 +03:00			`existing_pdf.close()`
Forking PIL 2010-07-31 06:52:47 +04:00
Format with Black 2019-03-21 16:28:20 +03:00
Forking PIL 2010-07-31 06:52:47 +04:00			`#`
			`# --------------------------------------------------------------------`

Flake8 blank line fixes 2018-03-03 12:54:00 +03:00
Forking PIL 2010-07-31 06:52:47 +04:00			`Image.register_save("PDF", _save)`
Added PDF multipage saving 2015-09-29 15:51:52 +03:00			`Image.register_save_all("PDF", _save_all)`
Forking PIL 2010-07-31 06:52:47 +04:00
			`Image.register_extension("PDF", ".pdf")`

			`Image.register_mime("PDF", "application/pdf")`