2018-01-25 02:44:59 +03:00
|
|
|
import io
|
2018-01-24 04:28:39 +03:00
|
|
|
import os
|
2014-06-10 13:10:47 +04:00
|
|
|
import os.path
|
2018-01-24 04:28:39 +03:00
|
|
|
import tempfile
|
2018-07-29 15:33:59 +03:00
|
|
|
import time
|
2014-05-11 10:01:09 +04:00
|
|
|
|
2019-07-06 23:40:53 +03:00
|
|
|
from PIL import Image, PdfParser
|
|
|
|
|
|
|
|
from .helper import PillowTestCase, hopper
|
|
|
|
|
2014-05-11 10:01:09 +04:00
|
|
|
|
2014-06-10 13:10:47 +04:00
|
|
|
class TestFilePdf(PillowTestCase):
|
2018-01-25 02:20:18 +03:00
|
|
|
def helper_save_as_pdf(self, mode, **kwargs):
|
2014-06-10 13:10:47 +04:00
|
|
|
# Arrange
|
2014-09-05 13:36:24 +04:00
|
|
|
im = hopper(mode)
|
2014-06-10 13:10:47 +04:00
|
|
|
outfile = self.tempfile("temp_" + mode + ".pdf")
|
2014-05-12 15:30:03 +04:00
|
|
|
|
2014-06-10 13:10:47 +04:00
|
|
|
# Act
|
2018-01-25 02:20:18 +03:00
|
|
|
im.save(outfile, **kwargs)
|
2014-05-11 18:35:49 +04:00
|
|
|
|
2014-06-10 13:10:47 +04:00
|
|
|
# Assert
|
|
|
|
self.assertTrue(os.path.isfile(outfile))
|
|
|
|
self.assertGreater(os.path.getsize(outfile), 0)
|
2018-01-31 02:35:55 +03:00
|
|
|
with PdfParser.PdfParser(outfile) as pdf:
|
2019-06-13 18:54:11 +03:00
|
|
|
if kwargs.get("append_images", False) or kwargs.get("append", False):
|
2018-01-26 21:07:43 +03:00
|
|
|
self.assertGreater(len(pdf.pages), 1)
|
|
|
|
else:
|
|
|
|
self.assertGreater(len(pdf.pages), 0)
|
2019-06-13 18:54:11 +03:00
|
|
|
with open(outfile, "rb") as fp:
|
2019-04-30 08:31:47 +03:00
|
|
|
contents = fp.read()
|
2019-06-13 18:54:11 +03:00
|
|
|
size = tuple(
|
|
|
|
int(d)
|
|
|
|
for d in contents.split(b"/MediaBox [ 0 0 ")[1].split(b"]")[0].split()
|
|
|
|
)
|
2019-04-30 08:31:47 +03:00
|
|
|
self.assertEqual(im.size, size)
|
2014-05-11 18:35:49 +04:00
|
|
|
|
2018-01-24 04:28:39 +03:00
|
|
|
return outfile
|
|
|
|
|
2014-06-10 13:10:47 +04:00
|
|
|
def test_monochrome(self):
|
|
|
|
# Arrange
|
|
|
|
mode = "1"
|
2014-05-11 18:16:13 +04:00
|
|
|
|
2014-06-10 13:10:47 +04:00
|
|
|
# Act / Assert
|
|
|
|
self.helper_save_as_pdf(mode)
|
2014-05-11 18:16:13 +04:00
|
|
|
|
2014-06-10 13:10:47 +04:00
|
|
|
def test_greyscale(self):
|
|
|
|
# Arrange
|
|
|
|
mode = "L"
|
2014-05-11 18:16:13 +04:00
|
|
|
|
2014-06-10 13:10:47 +04:00
|
|
|
# Act / Assert
|
|
|
|
self.helper_save_as_pdf(mode)
|
2014-05-11 18:16:13 +04:00
|
|
|
|
2014-06-10 13:10:47 +04:00
|
|
|
def test_rgb(self):
|
|
|
|
# Arrange
|
|
|
|
mode = "RGB"
|
2014-05-11 18:16:13 +04:00
|
|
|
|
2014-06-10 13:10:47 +04:00
|
|
|
# Act / Assert
|
|
|
|
self.helper_save_as_pdf(mode)
|
2014-05-11 18:16:13 +04:00
|
|
|
|
2014-06-10 13:10:47 +04:00
|
|
|
def test_p_mode(self):
|
|
|
|
# Arrange
|
|
|
|
mode = "P"
|
2014-05-12 15:45:54 +04:00
|
|
|
|
2014-06-10 13:10:47 +04:00
|
|
|
# Act / Assert
|
|
|
|
self.helper_save_as_pdf(mode)
|
2014-05-11 18:16:13 +04:00
|
|
|
|
2014-06-10 13:10:47 +04:00
|
|
|
def test_cmyk_mode(self):
|
|
|
|
# Arrange
|
|
|
|
mode = "CMYK"
|
2014-05-11 18:16:13 +04:00
|
|
|
|
2014-06-10 13:10:47 +04:00
|
|
|
# Act / Assert
|
|
|
|
self.helper_save_as_pdf(mode)
|
2014-05-11 18:16:13 +04:00
|
|
|
|
2015-07-03 08:03:25 +03:00
|
|
|
def test_unsupported_mode(self):
|
|
|
|
im = hopper("LA")
|
|
|
|
outfile = self.tempfile("temp_LA.pdf")
|
|
|
|
|
2017-09-01 14:05:40 +03:00
|
|
|
self.assertRaises(ValueError, im.save, outfile)
|
2015-07-03 08:03:25 +03:00
|
|
|
|
2015-09-29 15:51:52 +03:00
|
|
|
def test_save_all(self):
|
|
|
|
# Single frame image
|
|
|
|
self.helper_save_as_pdf("RGB", save_all=True)
|
|
|
|
|
|
|
|
# Multiframe image
|
|
|
|
im = Image.open("Tests/images/dispose_bgnd.gif")
|
|
|
|
|
2019-06-13 18:54:11 +03:00
|
|
|
outfile = self.tempfile("temp.pdf")
|
2015-09-29 15:51:52 +03:00
|
|
|
im.save(outfile, save_all=True)
|
|
|
|
|
|
|
|
self.assertTrue(os.path.isfile(outfile))
|
|
|
|
self.assertGreater(os.path.getsize(outfile), 0)
|
|
|
|
|
2017-05-13 07:26:52 +03:00
|
|
|
# Append images
|
2017-11-04 02:46:15 +03:00
|
|
|
ims = [hopper()]
|
|
|
|
im.copy().save(outfile, save_all=True, append_images=ims)
|
|
|
|
|
|
|
|
self.assertTrue(os.path.isfile(outfile))
|
|
|
|
self.assertGreater(os.path.getsize(outfile), 0)
|
|
|
|
|
|
|
|
# Test appending using a generator
|
|
|
|
def imGenerator(ims):
|
2019-09-30 17:56:31 +03:00
|
|
|
yield from ims
|
2019-06-13 18:54:11 +03:00
|
|
|
|
2017-11-04 02:46:15 +03:00
|
|
|
im.save(outfile, save_all=True, append_images=imGenerator(ims))
|
2017-05-13 07:26:52 +03:00
|
|
|
|
|
|
|
self.assertTrue(os.path.isfile(outfile))
|
|
|
|
self.assertGreater(os.path.getsize(outfile), 0)
|
|
|
|
|
2017-10-19 14:30:34 +03:00
|
|
|
# Append JPEG images
|
|
|
|
jpeg = Image.open("Tests/images/flower.jpg")
|
|
|
|
jpeg.save(outfile, save_all=True, append_images=[jpeg.copy()])
|
|
|
|
|
|
|
|
self.assertTrue(os.path.isfile(outfile))
|
|
|
|
self.assertGreater(os.path.getsize(outfile), 0)
|
|
|
|
|
2018-05-18 15:15:45 +03:00
|
|
|
def test_multiframe_normal_save(self):
|
|
|
|
# Test saving a multiframe image without save_all
|
|
|
|
im = Image.open("Tests/images/dispose_bgnd.gif")
|
|
|
|
|
2019-06-13 18:54:11 +03:00
|
|
|
outfile = self.tempfile("temp.pdf")
|
2018-05-18 15:15:45 +03:00
|
|
|
im.save(outfile)
|
|
|
|
|
|
|
|
self.assertTrue(os.path.isfile(outfile))
|
|
|
|
self.assertGreater(os.path.getsize(outfile), 0)
|
|
|
|
|
2018-01-24 04:28:39 +03:00
|
|
|
def test_pdf_open(self):
|
|
|
|
# fail on a buffer full of null bytes
|
2018-06-24 15:32:25 +03:00
|
|
|
self.assertRaises(
|
2019-06-13 18:54:11 +03:00
|
|
|
PdfParser.PdfFormatError, PdfParser.PdfParser, buf=bytearray(65536)
|
|
|
|
)
|
2018-01-31 02:25:04 +03:00
|
|
|
|
2018-01-24 04:28:39 +03:00
|
|
|
# make an empty PDF object
|
2018-01-31 02:35:55 +03:00
|
|
|
with PdfParser.PdfParser() as empty_pdf:
|
2018-01-26 21:07:43 +03:00
|
|
|
self.assertEqual(len(empty_pdf.pages), 0)
|
|
|
|
self.assertEqual(len(empty_pdf.info), 0)
|
|
|
|
self.assertFalse(empty_pdf.should_close_buf)
|
|
|
|
self.assertFalse(empty_pdf.should_close_file)
|
2018-01-31 02:25:04 +03:00
|
|
|
|
2018-01-24 04:28:39 +03:00
|
|
|
# make a PDF file
|
|
|
|
pdf_filename = self.helper_save_as_pdf("RGB")
|
2018-01-31 02:25:04 +03:00
|
|
|
|
2018-01-24 04:28:39 +03:00
|
|
|
# open the PDF file
|
2018-01-31 02:35:55 +03:00
|
|
|
with PdfParser.PdfParser(filename=pdf_filename) as hopper_pdf:
|
2018-01-26 21:07:43 +03:00
|
|
|
self.assertEqual(len(hopper_pdf.pages), 1)
|
|
|
|
self.assertTrue(hopper_pdf.should_close_buf)
|
|
|
|
self.assertTrue(hopper_pdf.should_close_file)
|
2018-01-31 02:25:04 +03:00
|
|
|
|
2018-01-24 04:28:39 +03:00
|
|
|
# read a PDF file from a buffer with a non-zero offset
|
|
|
|
with open(pdf_filename, "rb") as f:
|
|
|
|
content = b"xyzzy" + f.read()
|
2018-01-31 02:35:55 +03:00
|
|
|
with PdfParser.PdfParser(buf=content, start_offset=5) as hopper_pdf:
|
2018-01-26 21:07:43 +03:00
|
|
|
self.assertEqual(len(hopper_pdf.pages), 1)
|
|
|
|
self.assertFalse(hopper_pdf.should_close_buf)
|
|
|
|
self.assertFalse(hopper_pdf.should_close_file)
|
2018-01-31 02:25:04 +03:00
|
|
|
|
2018-01-24 04:28:39 +03:00
|
|
|
# read a PDF file from an already open file
|
|
|
|
with open(pdf_filename, "rb") as f:
|
2018-01-31 02:35:55 +03:00
|
|
|
with PdfParser.PdfParser(f=f) as hopper_pdf:
|
2018-01-26 21:07:43 +03:00
|
|
|
self.assertEqual(len(hopper_pdf.pages), 1)
|
|
|
|
self.assertTrue(hopper_pdf.should_close_buf)
|
|
|
|
self.assertFalse(hopper_pdf.should_close_file)
|
2018-01-24 04:28:39 +03:00
|
|
|
|
|
|
|
def test_pdf_append_fails_on_nonexistent_file(self):
|
|
|
|
im = hopper("RGB")
|
2019-10-08 16:32:42 +03:00
|
|
|
with tempfile.TemporaryDirectory() as temp_dir:
|
2019-06-13 18:54:11 +03:00
|
|
|
self.assertRaises(
|
|
|
|
IOError, im.save, os.path.join(temp_dir, "nonexistent.pdf"), append=True
|
|
|
|
)
|
2018-01-24 04:28:39 +03:00
|
|
|
|
2018-03-04 01:32:47 +03:00
|
|
|
def check_pdf_pages_consistency(self, pdf):
|
|
|
|
pages_info = pdf.read_indirect(pdf.pages_ref)
|
|
|
|
self.assertNotIn(b"Parent", pages_info)
|
|
|
|
self.assertIn(b"Kids", pages_info)
|
|
|
|
kids_not_used = pages_info[b"Kids"]
|
|
|
|
for page_ref in pdf.pages:
|
|
|
|
while True:
|
|
|
|
if page_ref in kids_not_used:
|
|
|
|
kids_not_used.remove(page_ref)
|
|
|
|
page_info = pdf.read_indirect(page_ref)
|
|
|
|
self.assertIn(b"Parent", page_info)
|
|
|
|
page_ref = page_info[b"Parent"]
|
|
|
|
if page_ref == pdf.pages_ref:
|
|
|
|
break
|
|
|
|
self.assertEqual(pdf.pages_ref, page_info[b"Parent"])
|
|
|
|
self.assertEqual(kids_not_used, [])
|
|
|
|
|
2018-01-24 04:28:39 +03:00
|
|
|
def test_pdf_append(self):
|
|
|
|
# make a PDF file
|
2018-01-31 02:35:55 +03:00
|
|
|
pdf_filename = self.helper_save_as_pdf("RGB", producer="PdfParser")
|
2018-01-31 02:25:04 +03:00
|
|
|
|
2018-01-24 04:28:39 +03:00
|
|
|
# open it, check pages and info
|
2018-01-31 02:35:55 +03:00
|
|
|
with PdfParser.PdfParser(pdf_filename, mode="r+b") as pdf:
|
2018-01-26 21:07:43 +03:00
|
|
|
self.assertEqual(len(pdf.pages), 1)
|
2018-07-29 15:33:59 +03:00
|
|
|
self.assertEqual(len(pdf.info), 4)
|
2019-06-13 18:54:11 +03:00
|
|
|
self.assertEqual(
|
|
|
|
pdf.info.Title, os.path.splitext(os.path.basename(pdf_filename))[0]
|
|
|
|
)
|
2018-01-31 02:35:55 +03:00
|
|
|
self.assertEqual(pdf.info.Producer, "PdfParser")
|
2018-07-29 15:33:59 +03:00
|
|
|
self.assertIn(b"CreationDate", pdf.info)
|
|
|
|
self.assertIn(b"ModDate", pdf.info)
|
2018-03-04 01:32:47 +03:00
|
|
|
self.check_pdf_pages_consistency(pdf)
|
2018-01-31 02:25:04 +03:00
|
|
|
|
2018-01-26 21:07:43 +03:00
|
|
|
# append some info
|
|
|
|
pdf.info.Title = "abc"
|
|
|
|
pdf.info.Author = "def"
|
2019-09-30 17:56:31 +03:00
|
|
|
pdf.info.Subject = "ghi\uABCD"
|
2018-01-26 21:07:43 +03:00
|
|
|
pdf.info.Keywords = "qw)e\\r(ty"
|
|
|
|
pdf.info.Creator = "hopper()"
|
|
|
|
pdf.start_writing()
|
2018-01-31 02:25:04 +03:00
|
|
|
pdf.write_xref_and_trailer()
|
|
|
|
|
2018-01-24 04:28:39 +03:00
|
|
|
# open it again, check pages and info again
|
2018-01-31 02:35:55 +03:00
|
|
|
with PdfParser.PdfParser(pdf_filename) as pdf:
|
2018-01-26 21:07:43 +03:00
|
|
|
self.assertEqual(len(pdf.pages), 1)
|
2018-07-29 15:33:59 +03:00
|
|
|
self.assertEqual(len(pdf.info), 8)
|
2018-01-26 21:07:43 +03:00
|
|
|
self.assertEqual(pdf.info.Title, "abc")
|
2018-07-29 15:33:59 +03:00
|
|
|
self.assertIn(b"CreationDate", pdf.info)
|
|
|
|
self.assertIn(b"ModDate", pdf.info)
|
2018-03-04 01:32:47 +03:00
|
|
|
self.check_pdf_pages_consistency(pdf)
|
2018-01-31 02:25:04 +03:00
|
|
|
|
2018-01-24 04:28:39 +03:00
|
|
|
# append two images
|
|
|
|
mode_CMYK = hopper("CMYK")
|
|
|
|
mode_P = hopper("P")
|
2019-06-13 18:54:11 +03:00
|
|
|
mode_CMYK.save(pdf_filename, append=True, save_all=True, append_images=[mode_P])
|
2018-01-31 02:25:04 +03:00
|
|
|
|
2018-01-24 04:28:39 +03:00
|
|
|
# open the PDF again, check pages and info again
|
2018-01-31 02:35:55 +03:00
|
|
|
with PdfParser.PdfParser(pdf_filename) as pdf:
|
2018-01-26 21:07:43 +03:00
|
|
|
self.assertEqual(len(pdf.pages), 3)
|
2018-07-29 15:33:59 +03:00
|
|
|
self.assertEqual(len(pdf.info), 8)
|
2018-01-31 02:35:55 +03:00
|
|
|
self.assertEqual(PdfParser.decode_text(pdf.info[b"Title"]), "abc")
|
2018-01-26 21:07:43 +03:00
|
|
|
self.assertEqual(pdf.info.Title, "abc")
|
2018-01-31 02:35:55 +03:00
|
|
|
self.assertEqual(pdf.info.Producer, "PdfParser")
|
2018-01-26 21:07:43 +03:00
|
|
|
self.assertEqual(pdf.info.Keywords, "qw)e\\r(ty")
|
2019-09-30 17:56:31 +03:00
|
|
|
self.assertEqual(pdf.info.Subject, "ghi\uABCD")
|
2018-07-29 15:33:59 +03:00
|
|
|
self.assertIn(b"CreationDate", pdf.info)
|
|
|
|
self.assertIn(b"ModDate", pdf.info)
|
2018-03-04 01:32:47 +03:00
|
|
|
self.check_pdf_pages_consistency(pdf)
|
2018-01-24 04:28:39 +03:00
|
|
|
|
2018-01-31 02:25:04 +03:00
|
|
|
def test_pdf_info(self):
|
2018-01-26 19:15:38 +03:00
|
|
|
# make a PDF file
|
2018-06-24 15:32:25 +03:00
|
|
|
pdf_filename = self.helper_save_as_pdf(
|
2019-06-13 18:54:11 +03:00
|
|
|
"RGB",
|
|
|
|
title="title",
|
|
|
|
author="author",
|
|
|
|
subject="subject",
|
|
|
|
keywords="keywords",
|
|
|
|
creator="creator",
|
|
|
|
producer="producer",
|
2018-07-29 15:33:59 +03:00
|
|
|
creationDate=time.strptime("2000", "%Y"),
|
2019-06-13 18:54:11 +03:00
|
|
|
modDate=time.strptime("2001", "%Y"),
|
|
|
|
)
|
2018-01-31 02:25:04 +03:00
|
|
|
|
2018-01-26 19:15:38 +03:00
|
|
|
# open it, check pages and info
|
2018-01-31 02:35:55 +03:00
|
|
|
with PdfParser.PdfParser(pdf_filename) as pdf:
|
2018-07-29 15:33:59 +03:00
|
|
|
self.assertEqual(len(pdf.info), 8)
|
2018-01-26 21:07:43 +03:00
|
|
|
self.assertEqual(pdf.info.Title, "title")
|
|
|
|
self.assertEqual(pdf.info.Author, "author")
|
|
|
|
self.assertEqual(pdf.info.Subject, "subject")
|
|
|
|
self.assertEqual(pdf.info.Keywords, "keywords")
|
|
|
|
self.assertEqual(pdf.info.Creator, "creator")
|
|
|
|
self.assertEqual(pdf.info.Producer, "producer")
|
2019-06-13 18:54:11 +03:00
|
|
|
self.assertEqual(pdf.info.CreationDate, time.strptime("2000", "%Y"))
|
2018-07-29 15:33:59 +03:00
|
|
|
self.assertEqual(pdf.info.ModDate, time.strptime("2001", "%Y"))
|
2018-03-04 02:01:14 +03:00
|
|
|
self.check_pdf_pages_consistency(pdf)
|
2018-01-26 19:15:38 +03:00
|
|
|
|
2018-01-25 02:44:59 +03:00
|
|
|
def test_pdf_append_to_bytesio(self):
|
|
|
|
im = hopper("RGB")
|
|
|
|
f = io.BytesIO()
|
|
|
|
im.save(f, format="PDF")
|
|
|
|
initial_size = len(f.getvalue())
|
|
|
|
self.assertGreater(initial_size, 0)
|
|
|
|
im = hopper("P")
|
|
|
|
f = io.BytesIO(f.getvalue())
|
|
|
|
im.save(f, format="PDF", append=True)
|
|
|
|
self.assertGreater(len(f.getvalue()), initial_size)
|