Pillow/Tests/test_file_pdf.py
2024-07-06 22:06:47 +10:00

352 lines
11 KiB
Python

from __future__ import annotations
import io
import os
import os.path
import tempfile
import time
from collections.abc import Generator
from pathlib import Path
from typing import Any
import pytest
from PIL import Image, PdfParser, features
from .helper import hopper, mark_if_feature_version, skip_unless_feature
def helper_save_as_pdf(tmp_path: Path, mode: str, **kwargs: Any) -> str:
# Arrange
im = hopper(mode)
outfile = str(tmp_path / ("temp_" + mode + ".pdf"))
# Act
im.save(outfile, **kwargs)
# Assert
assert os.path.isfile(outfile)
assert os.path.getsize(outfile) > 0
with PdfParser.PdfParser(outfile) as pdf:
if kwargs.get("append_images", False) or kwargs.get("append", False):
assert len(pdf.pages) > 1
else:
assert len(pdf.pages) > 0
with open(outfile, "rb") as fp:
contents = fp.read()
size = tuple(
float(d) for d in contents.split(b"/MediaBox [ 0 0 ")[1].split(b"]")[0].split()
)
assert im.size == size
return outfile
@pytest.mark.parametrize("mode", ("L", "P", "RGB", "CMYK"))
def test_save(tmp_path: Path, mode: str) -> None:
helper_save_as_pdf(tmp_path, mode)
@skip_unless_feature("jpg_2000")
@pytest.mark.parametrize("mode", ("LA", "RGBA"))
def test_save_alpha(tmp_path: Path, mode: str) -> None:
helper_save_as_pdf(tmp_path, mode)
def test_p_alpha(tmp_path: Path) -> None:
# Arrange
outfile = str(tmp_path / "temp.pdf")
with Image.open("Tests/images/pil123p.png") as im:
assert im.mode == "P"
assert isinstance(im.info["transparency"], bytes)
# Act
im.save(outfile)
# Assert
with open(outfile, "rb") as fp:
contents = fp.read()
assert b"\n/SMask " in contents
def test_monochrome(tmp_path: Path) -> None:
# Arrange
mode = "1"
# Act / Assert
outfile = helper_save_as_pdf(tmp_path, mode)
assert os.path.getsize(outfile) < (5000 if features.check("libtiff") else 15000)
def test_unsupported_mode(tmp_path: Path) -> None:
im = hopper("PA")
outfile = str(tmp_path / "temp_PA.pdf")
with pytest.raises(ValueError):
im.save(outfile)
def test_resolution(tmp_path: Path) -> None:
im = hopper()
outfile = str(tmp_path / "temp.pdf")
im.save(outfile, resolution=150)
with open(outfile, "rb") as fp:
contents = fp.read()
size = tuple(
float(d)
for d in contents.split(b"stream\nq ")[1].split(b" 0 0 cm")[0].split(b" 0 0 ")
)
assert size == (61.44, 61.44)
size = tuple(
float(d) for d in contents.split(b"/MediaBox [ 0 0 ")[1].split(b"]")[0].split()
)
assert size == (61.44, 61.44)
@pytest.mark.parametrize(
"params",
(
{"dpi": (75, 150)},
{"dpi": (75, 150), "resolution": 200},
),
)
def test_dpi(params: dict[str, int | tuple[int, int]], tmp_path: Path) -> None:
im = hopper()
outfile = str(tmp_path / "temp.pdf")
im.save(outfile, "PDF", **params)
with open(outfile, "rb") as fp:
contents = fp.read()
size = tuple(
float(d)
for d in contents.split(b"stream\nq ")[1].split(b" 0 0 cm")[0].split(b" 0 0 ")
)
assert size == (122.88, 61.44)
size = tuple(
float(d) for d in contents.split(b"/MediaBox [ 0 0 ")[1].split(b"]")[0].split()
)
assert size == (122.88, 61.44)
@mark_if_feature_version(
pytest.mark.valgrind_known_error, "libjpeg_turbo", "2.0", reason="Known Failing"
)
def test_save_all(tmp_path: Path) -> None:
# Single frame image
helper_save_as_pdf(tmp_path, "RGB", save_all=True)
# Multiframe image
with Image.open("Tests/images/dispose_bgnd.gif") as im:
outfile = str(tmp_path / "temp.pdf")
im.save(outfile, save_all=True)
assert os.path.isfile(outfile)
assert os.path.getsize(outfile) > 0
# Append images
ims = [hopper()]
im.copy().save(outfile, save_all=True, append_images=ims)
assert os.path.isfile(outfile)
assert os.path.getsize(outfile) > 0
# Test appending using a generator
def im_generator(ims: list[Image.Image]) -> Generator[Image.Image, None, None]:
yield from ims
im.save(outfile, save_all=True, append_images=im_generator(ims))
assert os.path.isfile(outfile)
assert os.path.getsize(outfile) > 0
# Append JPEG images
with Image.open("Tests/images/flower.jpg") as jpeg:
jpeg.save(outfile, save_all=True, append_images=[jpeg.copy()])
assert os.path.isfile(outfile)
assert os.path.getsize(outfile) > 0
def test_multiframe_normal_save(tmp_path: Path) -> None:
# Test saving a multiframe image without save_all
with Image.open("Tests/images/dispose_bgnd.gif") as im:
outfile = str(tmp_path / "temp.pdf")
im.save(outfile)
assert os.path.isfile(outfile)
assert os.path.getsize(outfile) > 0
def test_pdf_open(tmp_path: Path) -> None:
# fail on a buffer full of null bytes
with pytest.raises(PdfParser.PdfFormatError):
PdfParser.PdfParser(buf=bytearray(65536))
# make an empty PDF object
with PdfParser.PdfParser() as empty_pdf:
assert len(empty_pdf.pages) == 0
assert len(empty_pdf.info) == 0
assert not empty_pdf.should_close_buf
assert not empty_pdf.should_close_file
# make a PDF file
pdf_filename = helper_save_as_pdf(tmp_path, "RGB")
# open the PDF file
with PdfParser.PdfParser(filename=pdf_filename) as hopper_pdf:
assert len(hopper_pdf.pages) == 1
assert hopper_pdf.should_close_buf
assert hopper_pdf.should_close_file
# read a PDF file from a buffer with a non-zero offset
with open(pdf_filename, "rb") as f:
content = b"xyzzy" + f.read()
with PdfParser.PdfParser(buf=content, start_offset=5) as hopper_pdf:
assert len(hopper_pdf.pages) == 1
assert not hopper_pdf.should_close_buf
assert not hopper_pdf.should_close_file
# read a PDF file from an already open file
with open(pdf_filename, "rb") as f:
with PdfParser.PdfParser(f=f) as hopper_pdf:
assert len(hopper_pdf.pages) == 1
assert hopper_pdf.should_close_buf
assert not hopper_pdf.should_close_file
def test_pdf_append_fails_on_nonexistent_file() -> None:
im = hopper("RGB")
with tempfile.TemporaryDirectory() as temp_dir:
with pytest.raises(OSError):
im.save(os.path.join(temp_dir, "nonexistent.pdf"), append=True)
def check_pdf_pages_consistency(pdf: PdfParser.PdfParser) -> None:
assert pdf.pages_ref is not None
pages_info = pdf.read_indirect(pdf.pages_ref)
assert b"Parent" not in pages_info
assert b"Kids" in pages_info
kids_not_used = pages_info[b"Kids"]
for page_ref in pdf.pages:
while True:
if page_ref in kids_not_used:
kids_not_used.remove(page_ref)
page_info = pdf.read_indirect(page_ref)
assert b"Parent" in page_info
page_ref = page_info[b"Parent"]
if page_ref == pdf.pages_ref:
break
assert pdf.pages_ref == page_info[b"Parent"]
assert kids_not_used == []
def test_pdf_append(tmp_path: Path) -> None:
# make a PDF file
pdf_filename = helper_save_as_pdf(tmp_path, "RGB", producer="PdfParser")
# open it, check pages and info
with PdfParser.PdfParser(pdf_filename, mode="r+b") as pdf:
assert len(pdf.pages) == 1
assert len(pdf.info) == 4
assert pdf.info.Title == os.path.splitext(os.path.basename(pdf_filename))[0]
assert pdf.info.Producer == "PdfParser"
assert b"CreationDate" in pdf.info
assert b"ModDate" in pdf.info
check_pdf_pages_consistency(pdf)
# append some info
pdf.info.Title = "abc"
pdf.info.Author = "def"
pdf.info.Subject = "ghi\uABCD"
pdf.info.Keywords = "qw)e\\r(ty"
pdf.info.Creator = "hopper()"
pdf.start_writing()
pdf.write_xref_and_trailer()
# open it again, check pages and info again
with PdfParser.PdfParser(pdf_filename) as pdf:
assert len(pdf.pages) == 1
assert len(pdf.info) == 8
assert pdf.info.Title == "abc"
assert b"CreationDate" in pdf.info
assert b"ModDate" in pdf.info
check_pdf_pages_consistency(pdf)
# append two images
mode_cmyk = hopper("CMYK")
mode_p = hopper("P")
mode_cmyk.save(pdf_filename, append=True, save_all=True, append_images=[mode_p])
# open the PDF again, check pages and info again
with PdfParser.PdfParser(pdf_filename) as pdf:
assert len(pdf.pages) == 3
assert len(pdf.info) == 8
assert PdfParser.decode_text(pdf.info[b"Title"]) == "abc"
assert pdf.info.Title == "abc"
assert pdf.info.Producer == "PdfParser"
assert pdf.info.Keywords == "qw)e\\r(ty"
assert pdf.info.Subject == "ghi\uABCD"
assert b"CreationDate" in pdf.info
assert b"ModDate" in pdf.info
check_pdf_pages_consistency(pdf)
def test_pdf_info(tmp_path: Path) -> None:
# make a PDF file
pdf_filename = helper_save_as_pdf(
tmp_path,
"RGB",
title="title",
author="author",
subject="subject",
keywords="keywords",
creator="creator",
producer="producer",
creationDate=time.strptime("2000", "%Y"),
modDate=time.strptime("2001", "%Y"),
)
# open it, check pages and info
with PdfParser.PdfParser(pdf_filename) as pdf:
assert len(pdf.info) == 8
assert pdf.info.Title == "title"
assert pdf.info.Author == "author"
assert pdf.info.Subject == "subject"
assert pdf.info.Keywords == "keywords"
assert pdf.info.Creator == "creator"
assert pdf.info.Producer == "producer"
assert pdf.info.CreationDate == time.strptime("2000", "%Y")
assert pdf.info.ModDate == time.strptime("2001", "%Y")
check_pdf_pages_consistency(pdf)
def test_pdf_append_to_bytesio() -> None:
im = hopper("RGB")
f = io.BytesIO()
im.save(f, format="PDF")
initial_size = len(f.getvalue())
assert initial_size > 0
im = hopper("P")
f = io.BytesIO(f.getvalue())
im.save(f, format="PDF", append=True)
assert len(f.getvalue()) > initial_size
@pytest.mark.timeout(1)
@pytest.mark.skipif("PILLOW_VALGRIND_TEST" in os.environ, reason="Valgrind is slower")
@pytest.mark.parametrize("newline", (b"\r", b"\n"))
def test_redos(newline: bytes) -> None:
malicious = b" trailer<<>>" + newline * 3456
# This particular exception isn't relevant here.
# The important thing is it doesn't timeout, cause a ReDoS (CVE-2021-25292).
with pytest.raises(PdfParser.PdfFormatError):
PdfParser.PdfParser(buf=malicious)