Pillow/Tests/test_file_pdf.py

274 lines
8.9 KiB
Python
Raw Normal View History

import io
import os
2014-06-10 13:10:47 +04:00
import os.path
import tempfile
import time
2014-05-11 10:01:09 +04:00
import pytest
from PIL import Image, PdfParser
from .helper import PillowTestCase, hopper
2014-05-11 10:01:09 +04:00
2014-06-10 13:10:47 +04:00
class TestFilePdf(PillowTestCase):
def helper_save_as_pdf(self, mode, **kwargs):
2014-06-10 13:10:47 +04:00
# Arrange
2014-09-05 13:36:24 +04:00
im = hopper(mode)
2014-06-10 13:10:47 +04:00
outfile = self.tempfile("temp_" + mode + ".pdf")
2014-06-10 13:10:47 +04:00
# Act
im.save(outfile, **kwargs)
2014-05-11 18:35:49 +04:00
2014-06-10 13:10:47 +04:00
# Assert
assert os.path.isfile(outfile)
assert os.path.getsize(outfile) > 0
with PdfParser.PdfParser(outfile) as pdf:
2019-06-13 18:54:11 +03:00
if kwargs.get("append_images", False) or kwargs.get("append", False):
assert len(pdf.pages) > 1
else:
assert len(pdf.pages) > 0
2019-06-13 18:54:11 +03:00
with open(outfile, "rb") as fp:
2019-04-30 08:31:47 +03:00
contents = fp.read()
2019-06-13 18:54:11 +03:00
size = tuple(
int(d)
for d in contents.split(b"/MediaBox [ 0 0 ")[1].split(b"]")[0].split()
)
assert im.size == size
2014-05-11 18:35:49 +04:00
return outfile
2014-06-10 13:10:47 +04:00
def test_monochrome(self):
# Arrange
mode = "1"
2014-05-11 18:16:13 +04:00
2014-06-10 13:10:47 +04:00
# Act / Assert
self.helper_save_as_pdf(mode)
2014-05-11 18:16:13 +04:00
2014-06-10 13:10:47 +04:00
def test_greyscale(self):
# Arrange
mode = "L"
2014-05-11 18:16:13 +04:00
2014-06-10 13:10:47 +04:00
# Act / Assert
self.helper_save_as_pdf(mode)
2014-05-11 18:16:13 +04:00
2014-06-10 13:10:47 +04:00
def test_rgb(self):
# Arrange
mode = "RGB"
2014-05-11 18:16:13 +04:00
2014-06-10 13:10:47 +04:00
# Act / Assert
self.helper_save_as_pdf(mode)
2014-05-11 18:16:13 +04:00
2014-06-10 13:10:47 +04:00
def test_p_mode(self):
# Arrange
mode = "P"
2014-05-12 15:45:54 +04:00
2014-06-10 13:10:47 +04:00
# Act / Assert
self.helper_save_as_pdf(mode)
2014-05-11 18:16:13 +04:00
2014-06-10 13:10:47 +04:00
def test_cmyk_mode(self):
# Arrange
mode = "CMYK"
2014-05-11 18:16:13 +04:00
2014-06-10 13:10:47 +04:00
# Act / Assert
self.helper_save_as_pdf(mode)
2014-05-11 18:16:13 +04:00
2015-07-03 08:03:25 +03:00
def test_unsupported_mode(self):
im = hopper("LA")
outfile = self.tempfile("temp_LA.pdf")
with pytest.raises(ValueError):
im.save(outfile)
2015-07-03 08:03:25 +03:00
2015-09-29 15:51:52 +03:00
def test_save_all(self):
# Single frame image
self.helper_save_as_pdf("RGB", save_all=True)
# Multiframe image
Improve handling of file resources Follow Python's file object semantics. User code is responsible for closing resources (usually through a context manager) in a deterministic way. To achieve this, remove __del__ functions. These functions used to closed open file handlers in an attempt to silence Python ResourceWarnings. However, using __del__ has the following drawbacks: - __del__ isn't called until the object's reference count reaches 0. Therefore, resource handlers remain open or in use longer than necessary. - The __del__ method isn't guaranteed to execute on system exit. See the Python documentation: https://docs.python.org/3/reference/datamodel.html#object.__del__ > It is not guaranteed that __del__() methods are called for objects > that still exist when the interpreter exits. - Exceptions that occur inside __del__ are ignored instead of raised. This has the potential of hiding bugs. This is also in the Python documentation: > Warning: Due to the precarious circumstances under which __del__() > methods are invoked, exceptions that occur during their execution > are ignored, and a warning is printed to sys.stderr instead. Instead, always close resource handlers when they are no longer in use. This will close the file handler at a specified point in the user's code and not wait until the interpreter chooses to. It is always guaranteed to run. And, if an exception occurs while closing the file handler, the bug will not be ignored. Now, when code receives a ResourceWarning, it will highlight an area that is mishandling resources. It should not simply be silenced, but fixed by closing resources with a context manager. All warnings that were emitted during tests have been cleaned up. To enable warnings, I passed the `-Wa` CLI option to Python. This exposed some mishandling of resources in ImageFile.__init__() and SpiderImagePlugin.loadImageSeries(), they too were fixed.
2019-05-25 19:30:58 +03:00
with Image.open("Tests/images/dispose_bgnd.gif") as im:
2015-09-29 15:51:52 +03:00
Improve handling of file resources Follow Python's file object semantics. User code is responsible for closing resources (usually through a context manager) in a deterministic way. To achieve this, remove __del__ functions. These functions used to closed open file handlers in an attempt to silence Python ResourceWarnings. However, using __del__ has the following drawbacks: - __del__ isn't called until the object's reference count reaches 0. Therefore, resource handlers remain open or in use longer than necessary. - The __del__ method isn't guaranteed to execute on system exit. See the Python documentation: https://docs.python.org/3/reference/datamodel.html#object.__del__ > It is not guaranteed that __del__() methods are called for objects > that still exist when the interpreter exits. - Exceptions that occur inside __del__ are ignored instead of raised. This has the potential of hiding bugs. This is also in the Python documentation: > Warning: Due to the precarious circumstances under which __del__() > methods are invoked, exceptions that occur during their execution > are ignored, and a warning is printed to sys.stderr instead. Instead, always close resource handlers when they are no longer in use. This will close the file handler at a specified point in the user's code and not wait until the interpreter chooses to. It is always guaranteed to run. And, if an exception occurs while closing the file handler, the bug will not be ignored. Now, when code receives a ResourceWarning, it will highlight an area that is mishandling resources. It should not simply be silenced, but fixed by closing resources with a context manager. All warnings that were emitted during tests have been cleaned up. To enable warnings, I passed the `-Wa` CLI option to Python. This exposed some mishandling of resources in ImageFile.__init__() and SpiderImagePlugin.loadImageSeries(), they too were fixed.
2019-05-25 19:30:58 +03:00
outfile = self.tempfile("temp.pdf")
im.save(outfile, save_all=True)
2015-09-29 15:51:52 +03:00
assert os.path.isfile(outfile)
assert os.path.getsize(outfile) > 0
2015-09-29 15:51:52 +03:00
Improve handling of file resources Follow Python's file object semantics. User code is responsible for closing resources (usually through a context manager) in a deterministic way. To achieve this, remove __del__ functions. These functions used to closed open file handlers in an attempt to silence Python ResourceWarnings. However, using __del__ has the following drawbacks: - __del__ isn't called until the object's reference count reaches 0. Therefore, resource handlers remain open or in use longer than necessary. - The __del__ method isn't guaranteed to execute on system exit. See the Python documentation: https://docs.python.org/3/reference/datamodel.html#object.__del__ > It is not guaranteed that __del__() methods are called for objects > that still exist when the interpreter exits. - Exceptions that occur inside __del__ are ignored instead of raised. This has the potential of hiding bugs. This is also in the Python documentation: > Warning: Due to the precarious circumstances under which __del__() > methods are invoked, exceptions that occur during their execution > are ignored, and a warning is printed to sys.stderr instead. Instead, always close resource handlers when they are no longer in use. This will close the file handler at a specified point in the user's code and not wait until the interpreter chooses to. It is always guaranteed to run. And, if an exception occurs while closing the file handler, the bug will not be ignored. Now, when code receives a ResourceWarning, it will highlight an area that is mishandling resources. It should not simply be silenced, but fixed by closing resources with a context manager. All warnings that were emitted during tests have been cleaned up. To enable warnings, I passed the `-Wa` CLI option to Python. This exposed some mishandling of resources in ImageFile.__init__() and SpiderImagePlugin.loadImageSeries(), they too were fixed.
2019-05-25 19:30:58 +03:00
# Append images
ims = [hopper()]
im.copy().save(outfile, save_all=True, append_images=ims)
assert os.path.isfile(outfile)
assert os.path.getsize(outfile) > 0
Improve handling of file resources Follow Python's file object semantics. User code is responsible for closing resources (usually through a context manager) in a deterministic way. To achieve this, remove __del__ functions. These functions used to closed open file handlers in an attempt to silence Python ResourceWarnings. However, using __del__ has the following drawbacks: - __del__ isn't called until the object's reference count reaches 0. Therefore, resource handlers remain open or in use longer than necessary. - The __del__ method isn't guaranteed to execute on system exit. See the Python documentation: https://docs.python.org/3/reference/datamodel.html#object.__del__ > It is not guaranteed that __del__() methods are called for objects > that still exist when the interpreter exits. - Exceptions that occur inside __del__ are ignored instead of raised. This has the potential of hiding bugs. This is also in the Python documentation: > Warning: Due to the precarious circumstances under which __del__() > methods are invoked, exceptions that occur during their execution > are ignored, and a warning is printed to sys.stderr instead. Instead, always close resource handlers when they are no longer in use. This will close the file handler at a specified point in the user's code and not wait until the interpreter chooses to. It is always guaranteed to run. And, if an exception occurs while closing the file handler, the bug will not be ignored. Now, when code receives a ResourceWarning, it will highlight an area that is mishandling resources. It should not simply be silenced, but fixed by closing resources with a context manager. All warnings that were emitted during tests have been cleaned up. To enable warnings, I passed the `-Wa` CLI option to Python. This exposed some mishandling of resources in ImageFile.__init__() and SpiderImagePlugin.loadImageSeries(), they too were fixed.
2019-05-25 19:30:58 +03:00
# Test appending using a generator
def imGenerator(ims):
yield from ims
2019-06-13 18:54:11 +03:00
Improve handling of file resources Follow Python's file object semantics. User code is responsible for closing resources (usually through a context manager) in a deterministic way. To achieve this, remove __del__ functions. These functions used to closed open file handlers in an attempt to silence Python ResourceWarnings. However, using __del__ has the following drawbacks: - __del__ isn't called until the object's reference count reaches 0. Therefore, resource handlers remain open or in use longer than necessary. - The __del__ method isn't guaranteed to execute on system exit. See the Python documentation: https://docs.python.org/3/reference/datamodel.html#object.__del__ > It is not guaranteed that __del__() methods are called for objects > that still exist when the interpreter exits. - Exceptions that occur inside __del__ are ignored instead of raised. This has the potential of hiding bugs. This is also in the Python documentation: > Warning: Due to the precarious circumstances under which __del__() > methods are invoked, exceptions that occur during their execution > are ignored, and a warning is printed to sys.stderr instead. Instead, always close resource handlers when they are no longer in use. This will close the file handler at a specified point in the user's code and not wait until the interpreter chooses to. It is always guaranteed to run. And, if an exception occurs while closing the file handler, the bug will not be ignored. Now, when code receives a ResourceWarning, it will highlight an area that is mishandling resources. It should not simply be silenced, but fixed by closing resources with a context manager. All warnings that were emitted during tests have been cleaned up. To enable warnings, I passed the `-Wa` CLI option to Python. This exposed some mishandling of resources in ImageFile.__init__() and SpiderImagePlugin.loadImageSeries(), they too were fixed.
2019-05-25 19:30:58 +03:00
im.save(outfile, save_all=True, append_images=imGenerator(ims))
2017-05-13 07:26:52 +03:00
assert os.path.isfile(outfile)
assert os.path.getsize(outfile) > 0
2017-05-13 07:26:52 +03:00
# Append JPEG images
2019-11-25 23:03:23 +03:00
with Image.open("Tests/images/flower.jpg") as jpeg:
jpeg.save(outfile, save_all=True, append_images=[jpeg.copy()])
assert os.path.isfile(outfile)
assert os.path.getsize(outfile) > 0
def test_multiframe_normal_save(self):
# Test saving a multiframe image without save_all
Improve handling of file resources Follow Python's file object semantics. User code is responsible for closing resources (usually through a context manager) in a deterministic way. To achieve this, remove __del__ functions. These functions used to closed open file handlers in an attempt to silence Python ResourceWarnings. However, using __del__ has the following drawbacks: - __del__ isn't called until the object's reference count reaches 0. Therefore, resource handlers remain open or in use longer than necessary. - The __del__ method isn't guaranteed to execute on system exit. See the Python documentation: https://docs.python.org/3/reference/datamodel.html#object.__del__ > It is not guaranteed that __del__() methods are called for objects > that still exist when the interpreter exits. - Exceptions that occur inside __del__ are ignored instead of raised. This has the potential of hiding bugs. This is also in the Python documentation: > Warning: Due to the precarious circumstances under which __del__() > methods are invoked, exceptions that occur during their execution > are ignored, and a warning is printed to sys.stderr instead. Instead, always close resource handlers when they are no longer in use. This will close the file handler at a specified point in the user's code and not wait until the interpreter chooses to. It is always guaranteed to run. And, if an exception occurs while closing the file handler, the bug will not be ignored. Now, when code receives a ResourceWarning, it will highlight an area that is mishandling resources. It should not simply be silenced, but fixed by closing resources with a context manager. All warnings that were emitted during tests have been cleaned up. To enable warnings, I passed the `-Wa` CLI option to Python. This exposed some mishandling of resources in ImageFile.__init__() and SpiderImagePlugin.loadImageSeries(), they too were fixed.
2019-05-25 19:30:58 +03:00
with Image.open("Tests/images/dispose_bgnd.gif") as im:
Improve handling of file resources Follow Python's file object semantics. User code is responsible for closing resources (usually through a context manager) in a deterministic way. To achieve this, remove __del__ functions. These functions used to closed open file handlers in an attempt to silence Python ResourceWarnings. However, using __del__ has the following drawbacks: - __del__ isn't called until the object's reference count reaches 0. Therefore, resource handlers remain open or in use longer than necessary. - The __del__ method isn't guaranteed to execute on system exit. See the Python documentation: https://docs.python.org/3/reference/datamodel.html#object.__del__ > It is not guaranteed that __del__() methods are called for objects > that still exist when the interpreter exits. - Exceptions that occur inside __del__ are ignored instead of raised. This has the potential of hiding bugs. This is also in the Python documentation: > Warning: Due to the precarious circumstances under which __del__() > methods are invoked, exceptions that occur during their execution > are ignored, and a warning is printed to sys.stderr instead. Instead, always close resource handlers when they are no longer in use. This will close the file handler at a specified point in the user's code and not wait until the interpreter chooses to. It is always guaranteed to run. And, if an exception occurs while closing the file handler, the bug will not be ignored. Now, when code receives a ResourceWarning, it will highlight an area that is mishandling resources. It should not simply be silenced, but fixed by closing resources with a context manager. All warnings that were emitted during tests have been cleaned up. To enable warnings, I passed the `-Wa` CLI option to Python. This exposed some mishandling of resources in ImageFile.__init__() and SpiderImagePlugin.loadImageSeries(), they too were fixed.
2019-05-25 19:30:58 +03:00
outfile = self.tempfile("temp.pdf")
im.save(outfile)
assert os.path.isfile(outfile)
assert os.path.getsize(outfile) > 0
def test_pdf_open(self):
# fail on a buffer full of null bytes
with pytest.raises(PdfParser.PdfFormatError):
PdfParser.PdfParser(buf=bytearray(65536))
# make an empty PDF object
with PdfParser.PdfParser() as empty_pdf:
assert len(empty_pdf.pages) == 0
assert len(empty_pdf.info) == 0
assert not empty_pdf.should_close_buf
assert not empty_pdf.should_close_file
# make a PDF file
pdf_filename = self.helper_save_as_pdf("RGB")
# open the PDF file
with PdfParser.PdfParser(filename=pdf_filename) as hopper_pdf:
assert len(hopper_pdf.pages) == 1
assert hopper_pdf.should_close_buf
assert hopper_pdf.should_close_file
# read a PDF file from a buffer with a non-zero offset
with open(pdf_filename, "rb") as f:
content = b"xyzzy" + f.read()
with PdfParser.PdfParser(buf=content, start_offset=5) as hopper_pdf:
assert len(hopper_pdf.pages) == 1
assert not hopper_pdf.should_close_buf
assert not hopper_pdf.should_close_file
# read a PDF file from an already open file
with open(pdf_filename, "rb") as f:
with PdfParser.PdfParser(f=f) as hopper_pdf:
assert len(hopper_pdf.pages) == 1
assert hopper_pdf.should_close_buf
assert not hopper_pdf.should_close_file
def test_pdf_append_fails_on_nonexistent_file(self):
im = hopper("RGB")
with tempfile.TemporaryDirectory() as temp_dir:
with pytest.raises(IOError):
im.save(os.path.join(temp_dir, "nonexistent.pdf"), append=True)
def check_pdf_pages_consistency(self, pdf):
pages_info = pdf.read_indirect(pdf.pages_ref)
assert b"Parent" not in pages_info
assert b"Kids" in pages_info
kids_not_used = pages_info[b"Kids"]
for page_ref in pdf.pages:
while True:
if page_ref in kids_not_used:
kids_not_used.remove(page_ref)
page_info = pdf.read_indirect(page_ref)
assert b"Parent" in page_info
page_ref = page_info[b"Parent"]
if page_ref == pdf.pages_ref:
break
assert pdf.pages_ref == page_info[b"Parent"]
assert kids_not_used == []
def test_pdf_append(self):
# make a PDF file
pdf_filename = self.helper_save_as_pdf("RGB", producer="PdfParser")
# open it, check pages and info
with PdfParser.PdfParser(pdf_filename, mode="r+b") as pdf:
assert len(pdf.pages) == 1
assert len(pdf.info) == 4
assert pdf.info.Title == os.path.splitext(os.path.basename(pdf_filename))[0]
assert pdf.info.Producer == "PdfParser"
assert b"CreationDate" in pdf.info
assert b"ModDate" in pdf.info
self.check_pdf_pages_consistency(pdf)
# append some info
pdf.info.Title = "abc"
pdf.info.Author = "def"
pdf.info.Subject = "ghi\uABCD"
pdf.info.Keywords = "qw)e\\r(ty"
pdf.info.Creator = "hopper()"
pdf.start_writing()
pdf.write_xref_and_trailer()
# open it again, check pages and info again
with PdfParser.PdfParser(pdf_filename) as pdf:
assert len(pdf.pages) == 1
assert len(pdf.info) == 8
assert pdf.info.Title == "abc"
assert b"CreationDate" in pdf.info
assert b"ModDate" in pdf.info
self.check_pdf_pages_consistency(pdf)
# append two images
mode_CMYK = hopper("CMYK")
mode_P = hopper("P")
2019-06-13 18:54:11 +03:00
mode_CMYK.save(pdf_filename, append=True, save_all=True, append_images=[mode_P])
# open the PDF again, check pages and info again
with PdfParser.PdfParser(pdf_filename) as pdf:
assert len(pdf.pages) == 3
assert len(pdf.info) == 8
assert PdfParser.decode_text(pdf.info[b"Title"]) == "abc"
assert pdf.info.Title == "abc"
assert pdf.info.Producer == "PdfParser"
assert pdf.info.Keywords == "qw)e\\r(ty"
assert pdf.info.Subject == "ghi\uABCD"
assert b"CreationDate" in pdf.info
assert b"ModDate" in pdf.info
self.check_pdf_pages_consistency(pdf)
def test_pdf_info(self):
# make a PDF file
2018-06-24 15:32:25 +03:00
pdf_filename = self.helper_save_as_pdf(
2019-06-13 18:54:11 +03:00
"RGB",
title="title",
author="author",
subject="subject",
keywords="keywords",
creator="creator",
producer="producer",
creationDate=time.strptime("2000", "%Y"),
2019-06-13 18:54:11 +03:00
modDate=time.strptime("2001", "%Y"),
)
# open it, check pages and info
with PdfParser.PdfParser(pdf_filename) as pdf:
assert len(pdf.info) == 8
assert pdf.info.Title == "title"
assert pdf.info.Author == "author"
assert pdf.info.Subject == "subject"
assert pdf.info.Keywords == "keywords"
assert pdf.info.Creator == "creator"
assert pdf.info.Producer == "producer"
assert pdf.info.CreationDate == time.strptime("2000", "%Y")
assert pdf.info.ModDate == time.strptime("2001", "%Y")
self.check_pdf_pages_consistency(pdf)
def test_pdf_append_to_bytesio(self):
im = hopper("RGB")
f = io.BytesIO()
im.save(f, format="PDF")
initial_size = len(f.getvalue())
assert initial_size > 0
im = hopper("P")
f = io.BytesIO(f.getvalue())
im.save(f, format="PDF", append=True)
assert len(f.getvalue()) > initial_size