Pillow/Tests/test_file_pdf.py

298 lines
9.0 KiB
Python
Raw Normal View History

import io
import os
2014-06-10 13:10:47 +04:00
import os.path
import tempfile
import time
2014-05-11 10:01:09 +04:00
import pytest
from PIL import Image, PdfParser, features
2021-04-10 00:33:21 +03:00
from .helper import hopper, mark_if_feature_version
2014-05-11 10:01:09 +04:00
2020-03-22 22:54:54 +03:00
def helper_save_as_pdf(tmp_path, mode, **kwargs):
# Arrange
im = hopper(mode)
outfile = str(tmp_path / ("temp_" + mode + ".pdf"))
2020-03-22 22:54:54 +03:00
# Act
im.save(outfile, **kwargs)
2014-05-11 18:35:49 +04:00
2020-03-22 22:54:54 +03:00
# Assert
assert os.path.isfile(outfile)
assert os.path.getsize(outfile) > 0
with PdfParser.PdfParser(outfile) as pdf:
if kwargs.get("append_images", False) or kwargs.get("append", False):
assert len(pdf.pages) > 1
else:
assert len(pdf.pages) > 0
with open(outfile, "rb") as fp:
contents = fp.read()
size = tuple(
float(d) for d in contents.split(b"/MediaBox [ 0 0 ")[1].split(b"]")[0].split()
2020-03-22 22:54:54 +03:00
)
assert im.size == size
return outfile
2014-05-11 18:35:49 +04:00
2022-10-03 08:57:42 +03:00
@pytest.mark.parametrize("mode", ("L", "P", "RGB", "CMYK"))
def test_save(tmp_path, mode):
helper_save_as_pdf(tmp_path, mode)
2022-08-23 02:16:40 +03:00
@pytest.mark.valgrind_known_error(reason="Temporary skip")
2020-03-22 22:54:54 +03:00
def test_monochrome(tmp_path):
# Arrange
mode = "1"
2014-05-11 18:16:13 +04:00
2020-03-22 22:54:54 +03:00
# Act / Assert
outfile = helper_save_as_pdf(tmp_path, mode)
assert os.path.getsize(outfile) < (5000 if features.check("libtiff") else 15000)
2014-05-11 18:16:13 +04:00
2020-03-22 22:54:54 +03:00
def test_unsupported_mode(tmp_path):
im = hopper("LA")
outfile = str(tmp_path / "temp_LA.pdf")
2015-09-29 15:51:52 +03:00
2020-03-22 22:54:54 +03:00
with pytest.raises(ValueError):
im.save(outfile)
2021-01-07 16:57:49 +03:00
def test_resolution(tmp_path):
im = hopper()
outfile = str(tmp_path / "temp.pdf")
im.save(outfile, resolution=150)
with open(outfile, "rb") as fp:
contents = fp.read()
size = tuple(
float(d)
for d in contents.split(b"stream\nq ")[1].split(b" 0 0 cm")[0].split(b" 0 0 ")
)
assert size == (61.44, 61.44)
size = tuple(
float(d) for d in contents.split(b"/MediaBox [ 0 0 ")[1].split(b"]")[0].split()
)
assert size == (61.44, 61.44)
2021-04-10 17:58:01 +03:00
@mark_if_feature_version(
pytest.mark.valgrind_known_error, "libjpeg_turbo", "2.0", reason="Known Failing"
)
2020-03-22 22:54:54 +03:00
def test_save_all(tmp_path):
# Single frame image
helper_save_as_pdf(tmp_path, "RGB", save_all=True)
2019-06-13 18:54:11 +03:00
2020-03-22 22:54:54 +03:00
# Multiframe image
with Image.open("Tests/images/dispose_bgnd.gif") as im:
outfile = str(tmp_path / "temp.pdf")
im.save(outfile, save_all=True)
2017-05-13 07:26:52 +03:00
assert os.path.isfile(outfile)
assert os.path.getsize(outfile) > 0
2017-05-13 07:26:52 +03:00
2020-03-22 22:54:54 +03:00
# Append images
ims = [hopper()]
im.copy().save(outfile, save_all=True, append_images=ims)
assert os.path.isfile(outfile)
assert os.path.getsize(outfile) > 0
2020-03-22 22:54:54 +03:00
# Test appending using a generator
def im_generator(ims):
2020-03-22 22:54:54 +03:00
yield from ims
im.save(outfile, save_all=True, append_images=im_generator(ims))
2020-03-22 22:54:54 +03:00
assert os.path.isfile(outfile)
assert os.path.getsize(outfile) > 0
2020-03-22 22:54:54 +03:00
# Append JPEG images
with Image.open("Tests/images/flower.jpg") as jpeg:
jpeg.save(outfile, save_all=True, append_images=[jpeg.copy()])
2020-03-22 22:54:54 +03:00
assert os.path.isfile(outfile)
assert os.path.getsize(outfile) > 0
2020-03-22 22:54:54 +03:00
def test_multiframe_normal_save(tmp_path):
# Test saving a multiframe image without save_all
with Image.open("Tests/images/dispose_bgnd.gif") as im:
outfile = str(tmp_path / "temp.pdf")
im.save(outfile)
2020-03-22 22:54:54 +03:00
assert os.path.isfile(outfile)
assert os.path.getsize(outfile) > 0
def test_pdf_open(tmp_path):
# fail on a buffer full of null bytes
with pytest.raises(PdfParser.PdfFormatError):
PdfParser.PdfParser(buf=bytearray(65536))
# make an empty PDF object
with PdfParser.PdfParser() as empty_pdf:
assert len(empty_pdf.pages) == 0
assert len(empty_pdf.info) == 0
assert not empty_pdf.should_close_buf
assert not empty_pdf.should_close_file
# make a PDF file
pdf_filename = helper_save_as_pdf(tmp_path, "RGB")
# open the PDF file
with PdfParser.PdfParser(filename=pdf_filename) as hopper_pdf:
assert len(hopper_pdf.pages) == 1
assert hopper_pdf.should_close_buf
assert hopper_pdf.should_close_file
# read a PDF file from a buffer with a non-zero offset
with open(pdf_filename, "rb") as f:
content = b"xyzzy" + f.read()
with PdfParser.PdfParser(buf=content, start_offset=5) as hopper_pdf:
assert len(hopper_pdf.pages) == 1
assert not hopper_pdf.should_close_buf
assert not hopper_pdf.should_close_file
# read a PDF file from an already open file
with open(pdf_filename, "rb") as f:
with PdfParser.PdfParser(f=f) as hopper_pdf:
assert len(hopper_pdf.pages) == 1
2020-03-22 22:54:54 +03:00
assert hopper_pdf.should_close_buf
assert not hopper_pdf.should_close_file
2020-03-22 22:54:54 +03:00
def test_pdf_append_fails_on_nonexistent_file():
im = hopper("RGB")
with tempfile.TemporaryDirectory() as temp_dir:
with pytest.raises(OSError):
2020-03-22 22:54:54 +03:00
im.save(os.path.join(temp_dir, "nonexistent.pdf"), append=True)
def check_pdf_pages_consistency(pdf):
pages_info = pdf.read_indirect(pdf.pages_ref)
assert b"Parent" not in pages_info
assert b"Kids" in pages_info
kids_not_used = pages_info[b"Kids"]
for page_ref in pdf.pages:
while True:
if page_ref in kids_not_used:
kids_not_used.remove(page_ref)
page_info = pdf.read_indirect(page_ref)
assert b"Parent" in page_info
page_ref = page_info[b"Parent"]
if page_ref == pdf.pages_ref:
break
assert pdf.pages_ref == page_info[b"Parent"]
assert kids_not_used == []
def test_pdf_append(tmp_path):
# make a PDF file
pdf_filename = helper_save_as_pdf(tmp_path, "RGB", producer="PdfParser")
# open it, check pages and info
with PdfParser.PdfParser(pdf_filename, mode="r+b") as pdf:
assert len(pdf.pages) == 1
assert len(pdf.info) == 4
assert pdf.info.Title == os.path.splitext(os.path.basename(pdf_filename))[0]
assert pdf.info.Producer == "PdfParser"
assert b"CreationDate" in pdf.info
assert b"ModDate" in pdf.info
check_pdf_pages_consistency(pdf)
# append some info
pdf.info.Title = "abc"
pdf.info.Author = "def"
pdf.info.Subject = "ghi\uABCD"
pdf.info.Keywords = "qw)e\\r(ty"
pdf.info.Creator = "hopper()"
pdf.start_writing()
pdf.write_xref_and_trailer()
# open it again, check pages and info again
with PdfParser.PdfParser(pdf_filename) as pdf:
assert len(pdf.pages) == 1
assert len(pdf.info) == 8
assert pdf.info.Title == "abc"
assert b"CreationDate" in pdf.info
assert b"ModDate" in pdf.info
check_pdf_pages_consistency(pdf)
# append two images
mode_cmyk = hopper("CMYK")
mode_p = hopper("P")
mode_cmyk.save(pdf_filename, append=True, save_all=True, append_images=[mode_p])
2020-03-22 22:54:54 +03:00
# open the PDF again, check pages and info again
with PdfParser.PdfParser(pdf_filename) as pdf:
assert len(pdf.pages) == 3
assert len(pdf.info) == 8
assert PdfParser.decode_text(pdf.info[b"Title"]) == "abc"
assert pdf.info.Title == "abc"
assert pdf.info.Producer == "PdfParser"
assert pdf.info.Keywords == "qw)e\\r(ty"
assert pdf.info.Subject == "ghi\uABCD"
assert b"CreationDate" in pdf.info
assert b"ModDate" in pdf.info
check_pdf_pages_consistency(pdf)
def test_pdf_info(tmp_path):
# make a PDF file
pdf_filename = helper_save_as_pdf(
tmp_path,
"RGB",
title="title",
author="author",
subject="subject",
keywords="keywords",
creator="creator",
producer="producer",
creationDate=time.strptime("2000", "%Y"),
modDate=time.strptime("2001", "%Y"),
)
# open it, check pages and info
with PdfParser.PdfParser(pdf_filename) as pdf:
assert len(pdf.info) == 8
assert pdf.info.Title == "title"
assert pdf.info.Author == "author"
assert pdf.info.Subject == "subject"
assert pdf.info.Keywords == "keywords"
assert pdf.info.Creator == "creator"
assert pdf.info.Producer == "producer"
assert pdf.info.CreationDate == time.strptime("2000", "%Y")
assert pdf.info.ModDate == time.strptime("2001", "%Y")
check_pdf_pages_consistency(pdf)
def test_pdf_append_to_bytesio():
im = hopper("RGB")
f = io.BytesIO()
im.save(f, format="PDF")
initial_size = len(f.getvalue())
assert initial_size > 0
im = hopper("P")
f = io.BytesIO(f.getvalue())
im.save(f, format="PDF", append=True)
assert len(f.getvalue()) > initial_size
2021-04-08 23:53:22 +03:00
@pytest.mark.timeout(1)
@pytest.mark.parametrize("newline", (b"\r", b"\n"))
def test_redos(newline):
malicious = b" trailer<<>>" + newline * 3456
2021-04-08 23:53:22 +03:00
# This particular exception isn't relevant here.
# The important thing is it doesn't timeout, cause a ReDoS (CVE-2021-25292).
with pytest.raises(PdfParser.PdfFormatError):
PdfParser.PdfParser(buf=malicious)