2023-12-21 14:13:31 +03:00
|
|
|
from __future__ import annotations
|
2024-01-20 14:23:03 +03:00
|
|
|
|
2018-01-25 02:44:59 +03:00
|
|
|
import io
|
2018-01-24 04:28:39 +03:00
|
|
|
import os
|
2014-06-10 13:10:47 +04:00
|
|
|
import os.path
|
2018-01-24 04:28:39 +03:00
|
|
|
import tempfile
|
2018-07-29 15:33:59 +03:00
|
|
|
import time
|
2024-07-03 09:44:45 +03:00
|
|
|
from collections.abc import Generator
|
2024-01-31 12:12:58 +03:00
|
|
|
from pathlib import Path
|
2024-07-03 09:44:45 +03:00
|
|
|
from typing import Any
|
2014-05-11 10:01:09 +04:00
|
|
|
|
2020-02-22 16:06:21 +03:00
|
|
|
import pytest
|
2020-08-07 13:28:33 +03:00
|
|
|
|
2022-08-22 12:57:33 +03:00
|
|
|
from PIL import Image, PdfParser, features
|
2019-07-06 23:40:53 +03:00
|
|
|
|
2023-02-02 13:48:47 +03:00
|
|
|
from .helper import hopper, mark_if_feature_version, skip_unless_feature
|
2019-07-06 23:40:53 +03:00
|
|
|
|
2014-05-11 10:01:09 +04:00
|
|
|
|
2024-02-17 07:00:38 +03:00
|
|
|
def helper_save_as_pdf(tmp_path: Path, mode: str, **kwargs: Any) -> str:
|
2020-03-22 22:54:54 +03:00
|
|
|
# Arrange
|
|
|
|
im = hopper(mode)
|
|
|
|
outfile = str(tmp_path / ("temp_" + mode + ".pdf"))
|
2014-05-12 15:30:03 +04:00
|
|
|
|
2020-03-22 22:54:54 +03:00
|
|
|
# Act
|
|
|
|
im.save(outfile, **kwargs)
|
2014-05-11 18:35:49 +04:00
|
|
|
|
2020-03-22 22:54:54 +03:00
|
|
|
# Assert
|
|
|
|
assert os.path.isfile(outfile)
|
|
|
|
assert os.path.getsize(outfile) > 0
|
|
|
|
with PdfParser.PdfParser(outfile) as pdf:
|
|
|
|
if kwargs.get("append_images", False) or kwargs.get("append", False):
|
|
|
|
assert len(pdf.pages) > 1
|
|
|
|
else:
|
|
|
|
assert len(pdf.pages) > 0
|
|
|
|
with open(outfile, "rb") as fp:
|
|
|
|
contents = fp.read()
|
|
|
|
size = tuple(
|
2021-05-01 05:13:09 +03:00
|
|
|
float(d) for d in contents.split(b"/MediaBox [ 0 0 ")[1].split(b"]")[0].split()
|
2020-03-22 22:54:54 +03:00
|
|
|
)
|
|
|
|
assert im.size == size
|
|
|
|
|
|
|
|
return outfile
|
2014-05-11 18:35:49 +04:00
|
|
|
|
2018-01-24 04:28:39 +03:00
|
|
|
|
2022-10-03 08:57:42 +03:00
|
|
|
@pytest.mark.parametrize("mode", ("L", "P", "RGB", "CMYK"))
|
2024-02-17 07:00:38 +03:00
|
|
|
def test_save(tmp_path: Path, mode: str) -> None:
|
2022-10-03 08:57:42 +03:00
|
|
|
helper_save_as_pdf(tmp_path, mode)
|
|
|
|
|
|
|
|
|
2023-02-02 13:48:47 +03:00
|
|
|
@skip_unless_feature("jpg_2000")
|
2023-07-22 13:13:55 +03:00
|
|
|
@pytest.mark.parametrize("mode", ("LA", "RGBA"))
|
2024-02-17 07:00:38 +03:00
|
|
|
def test_save_alpha(tmp_path: Path, mode: str) -> None:
|
2023-07-22 13:13:55 +03:00
|
|
|
helper_save_as_pdf(tmp_path, mode)
|
2023-02-02 13:48:47 +03:00
|
|
|
|
|
|
|
|
2024-01-31 12:12:58 +03:00
|
|
|
def test_p_alpha(tmp_path: Path) -> None:
|
2023-08-07 06:53:19 +03:00
|
|
|
# Arrange
|
|
|
|
outfile = str(tmp_path / "temp.pdf")
|
|
|
|
with Image.open("Tests/images/pil123p.png") as im:
|
|
|
|
assert im.mode == "P"
|
|
|
|
assert isinstance(im.info["transparency"], bytes)
|
|
|
|
|
|
|
|
# Act
|
|
|
|
im.save(outfile)
|
|
|
|
|
|
|
|
# Assert
|
|
|
|
with open(outfile, "rb") as fp:
|
|
|
|
contents = fp.read()
|
2023-08-07 13:49:29 +03:00
|
|
|
assert b"\n/SMask " in contents
|
2023-08-07 06:53:19 +03:00
|
|
|
|
|
|
|
|
2024-01-31 12:12:58 +03:00
|
|
|
def test_monochrome(tmp_path: Path) -> None:
|
2020-03-22 22:54:54 +03:00
|
|
|
# Arrange
|
|
|
|
mode = "1"
|
2014-05-11 18:16:13 +04:00
|
|
|
|
2020-03-22 22:54:54 +03:00
|
|
|
# Act / Assert
|
2021-04-22 15:16:21 +03:00
|
|
|
outfile = helper_save_as_pdf(tmp_path, mode)
|
2022-08-22 12:57:33 +03:00
|
|
|
assert os.path.getsize(outfile) < (5000 if features.check("libtiff") else 15000)
|
2014-05-11 18:16:13 +04:00
|
|
|
|
|
|
|
|
2024-01-31 12:12:58 +03:00
|
|
|
def test_unsupported_mode(tmp_path: Path) -> None:
|
2023-07-22 13:13:55 +03:00
|
|
|
im = hopper("PA")
|
|
|
|
outfile = str(tmp_path / "temp_PA.pdf")
|
2015-09-29 15:51:52 +03:00
|
|
|
|
2020-03-22 22:54:54 +03:00
|
|
|
with pytest.raises(ValueError):
|
|
|
|
im.save(outfile)
|
2017-11-04 02:46:15 +03:00
|
|
|
|
2021-01-07 16:57:49 +03:00
|
|
|
|
2024-01-31 12:12:58 +03:00
|
|
|
def test_resolution(tmp_path: Path) -> None:
|
2021-05-01 05:13:09 +03:00
|
|
|
im = hopper()
|
|
|
|
|
|
|
|
outfile = str(tmp_path / "temp.pdf")
|
|
|
|
im.save(outfile, resolution=150)
|
|
|
|
|
|
|
|
with open(outfile, "rb") as fp:
|
|
|
|
contents = fp.read()
|
|
|
|
|
|
|
|
size = tuple(
|
|
|
|
float(d)
|
|
|
|
for d in contents.split(b"stream\nq ")[1].split(b" 0 0 cm")[0].split(b" 0 0 ")
|
|
|
|
)
|
|
|
|
assert size == (61.44, 61.44)
|
|
|
|
|
|
|
|
size = tuple(
|
|
|
|
float(d) for d in contents.split(b"/MediaBox [ 0 0 ")[1].split(b"]")[0].split()
|
|
|
|
)
|
|
|
|
assert size == (61.44, 61.44)
|
|
|
|
|
|
|
|
|
2023-02-22 10:59:51 +03:00
|
|
|
@pytest.mark.parametrize(
|
|
|
|
"params",
|
|
|
|
(
|
|
|
|
{"dpi": (75, 150)},
|
|
|
|
{"dpi": (75, 150), "resolution": 200},
|
|
|
|
),
|
|
|
|
)
|
2024-02-17 07:00:38 +03:00
|
|
|
def test_dpi(params: dict[str, int | tuple[int, int]], tmp_path: Path) -> None:
|
2023-02-21 12:34:41 +03:00
|
|
|
im = hopper()
|
|
|
|
|
|
|
|
outfile = str(tmp_path / "temp.pdf")
|
2024-07-06 12:17:23 +03:00
|
|
|
im.save(outfile, "PDF", **params)
|
2023-02-21 12:34:41 +03:00
|
|
|
|
|
|
|
with open(outfile, "rb") as fp:
|
|
|
|
contents = fp.read()
|
|
|
|
|
|
|
|
size = tuple(
|
|
|
|
float(d)
|
|
|
|
for d in contents.split(b"stream\nq ")[1].split(b" 0 0 cm")[0].split(b" 0 0 ")
|
|
|
|
)
|
|
|
|
assert size == (122.88, 61.44)
|
|
|
|
|
|
|
|
size = tuple(
|
|
|
|
float(d) for d in contents.split(b"/MediaBox [ 0 0 ")[1].split(b"]")[0].split()
|
|
|
|
)
|
|
|
|
assert size == (122.88, 61.44)
|
|
|
|
|
|
|
|
|
2021-04-10 17:58:01 +03:00
|
|
|
@mark_if_feature_version(
|
|
|
|
pytest.mark.valgrind_known_error, "libjpeg_turbo", "2.0", reason="Known Failing"
|
|
|
|
)
|
2024-01-31 12:12:58 +03:00
|
|
|
def test_save_all(tmp_path: Path) -> None:
|
2020-03-22 22:54:54 +03:00
|
|
|
# Single frame image
|
|
|
|
helper_save_as_pdf(tmp_path, "RGB", save_all=True)
|
2019-06-13 18:54:11 +03:00
|
|
|
|
2020-03-22 22:54:54 +03:00
|
|
|
# Multiframe image
|
|
|
|
with Image.open("Tests/images/dispose_bgnd.gif") as im:
|
|
|
|
outfile = str(tmp_path / "temp.pdf")
|
|
|
|
im.save(outfile, save_all=True)
|
2017-05-13 07:26:52 +03:00
|
|
|
|
2020-02-22 16:06:21 +03:00
|
|
|
assert os.path.isfile(outfile)
|
|
|
|
assert os.path.getsize(outfile) > 0
|
2017-05-13 07:26:52 +03:00
|
|
|
|
2020-03-22 22:54:54 +03:00
|
|
|
# Append images
|
|
|
|
ims = [hopper()]
|
|
|
|
im.copy().save(outfile, save_all=True, append_images=ims)
|
2017-10-19 14:30:34 +03:00
|
|
|
|
2020-02-22 16:06:21 +03:00
|
|
|
assert os.path.isfile(outfile)
|
|
|
|
assert os.path.getsize(outfile) > 0
|
2017-10-19 14:30:34 +03:00
|
|
|
|
2020-03-22 22:54:54 +03:00
|
|
|
# Test appending using a generator
|
2024-02-17 07:00:38 +03:00
|
|
|
def im_generator(ims: list[Image.Image]) -> Generator[Image.Image, None, None]:
|
2020-03-22 22:54:54 +03:00
|
|
|
yield from ims
|
2018-05-18 15:15:45 +03:00
|
|
|
|
2022-04-10 22:17:35 +03:00
|
|
|
im.save(outfile, save_all=True, append_images=im_generator(ims))
|
2018-05-18 15:15:45 +03:00
|
|
|
|
2020-03-22 22:54:54 +03:00
|
|
|
assert os.path.isfile(outfile)
|
|
|
|
assert os.path.getsize(outfile) > 0
|
2018-05-18 15:15:45 +03:00
|
|
|
|
2020-03-22 22:54:54 +03:00
|
|
|
# Append JPEG images
|
|
|
|
with Image.open("Tests/images/flower.jpg") as jpeg:
|
|
|
|
jpeg.save(outfile, save_all=True, append_images=[jpeg.copy()])
|
2018-01-31 02:25:04 +03:00
|
|
|
|
2020-03-22 22:54:54 +03:00
|
|
|
assert os.path.isfile(outfile)
|
|
|
|
assert os.path.getsize(outfile) > 0
|
2018-01-31 02:25:04 +03:00
|
|
|
|
|
|
|
|
2024-01-31 12:12:58 +03:00
|
|
|
def test_multiframe_normal_save(tmp_path: Path) -> None:
|
2020-03-22 22:54:54 +03:00
|
|
|
# Test saving a multiframe image without save_all
|
|
|
|
with Image.open("Tests/images/dispose_bgnd.gif") as im:
|
|
|
|
outfile = str(tmp_path / "temp.pdf")
|
|
|
|
im.save(outfile)
|
2018-01-31 02:25:04 +03:00
|
|
|
|
2020-03-22 22:54:54 +03:00
|
|
|
assert os.path.isfile(outfile)
|
|
|
|
assert os.path.getsize(outfile) > 0
|
|
|
|
|
|
|
|
|
2024-01-31 12:12:58 +03:00
|
|
|
def test_pdf_open(tmp_path: Path) -> None:
|
2020-03-22 22:54:54 +03:00
|
|
|
# fail on a buffer full of null bytes
|
|
|
|
with pytest.raises(PdfParser.PdfFormatError):
|
|
|
|
PdfParser.PdfParser(buf=bytearray(65536))
|
|
|
|
|
|
|
|
# make an empty PDF object
|
|
|
|
with PdfParser.PdfParser() as empty_pdf:
|
|
|
|
assert len(empty_pdf.pages) == 0
|
|
|
|
assert len(empty_pdf.info) == 0
|
|
|
|
assert not empty_pdf.should_close_buf
|
|
|
|
assert not empty_pdf.should_close_file
|
|
|
|
|
|
|
|
# make a PDF file
|
|
|
|
pdf_filename = helper_save_as_pdf(tmp_path, "RGB")
|
|
|
|
|
|
|
|
# open the PDF file
|
|
|
|
with PdfParser.PdfParser(filename=pdf_filename) as hopper_pdf:
|
|
|
|
assert len(hopper_pdf.pages) == 1
|
|
|
|
assert hopper_pdf.should_close_buf
|
|
|
|
assert hopper_pdf.should_close_file
|
|
|
|
|
|
|
|
# read a PDF file from a buffer with a non-zero offset
|
|
|
|
with open(pdf_filename, "rb") as f:
|
|
|
|
content = b"xyzzy" + f.read()
|
|
|
|
with PdfParser.PdfParser(buf=content, start_offset=5) as hopper_pdf:
|
|
|
|
assert len(hopper_pdf.pages) == 1
|
|
|
|
assert not hopper_pdf.should_close_buf
|
|
|
|
assert not hopper_pdf.should_close_file
|
|
|
|
|
|
|
|
# read a PDF file from an already open file
|
|
|
|
with open(pdf_filename, "rb") as f:
|
|
|
|
with PdfParser.PdfParser(f=f) as hopper_pdf:
|
2020-02-22 16:06:21 +03:00
|
|
|
assert len(hopper_pdf.pages) == 1
|
2020-03-22 22:54:54 +03:00
|
|
|
assert hopper_pdf.should_close_buf
|
2020-02-22 16:06:21 +03:00
|
|
|
assert not hopper_pdf.should_close_file
|
2018-01-31 02:25:04 +03:00
|
|
|
|
2020-03-22 22:54:54 +03:00
|
|
|
|
2024-01-31 12:12:58 +03:00
|
|
|
def test_pdf_append_fails_on_nonexistent_file() -> None:
|
2020-03-22 22:54:54 +03:00
|
|
|
im = hopper("RGB")
|
|
|
|
with tempfile.TemporaryDirectory() as temp_dir:
|
2020-04-07 09:58:21 +03:00
|
|
|
with pytest.raises(OSError):
|
2020-03-22 22:54:54 +03:00
|
|
|
im.save(os.path.join(temp_dir, "nonexistent.pdf"), append=True)
|
|
|
|
|
|
|
|
|
2024-02-17 07:00:38 +03:00
|
|
|
def check_pdf_pages_consistency(pdf: PdfParser.PdfParser) -> None:
|
2024-07-06 15:06:47 +03:00
|
|
|
assert pdf.pages_ref is not None
|
2020-03-22 22:54:54 +03:00
|
|
|
pages_info = pdf.read_indirect(pdf.pages_ref)
|
|
|
|
assert b"Parent" not in pages_info
|
|
|
|
assert b"Kids" in pages_info
|
|
|
|
kids_not_used = pages_info[b"Kids"]
|
|
|
|
for page_ref in pdf.pages:
|
|
|
|
while True:
|
|
|
|
if page_ref in kids_not_used:
|
|
|
|
kids_not_used.remove(page_ref)
|
|
|
|
page_info = pdf.read_indirect(page_ref)
|
|
|
|
assert b"Parent" in page_info
|
|
|
|
page_ref = page_info[b"Parent"]
|
|
|
|
if page_ref == pdf.pages_ref:
|
|
|
|
break
|
|
|
|
assert pdf.pages_ref == page_info[b"Parent"]
|
|
|
|
assert kids_not_used == []
|
|
|
|
|
|
|
|
|
2024-01-31 12:12:58 +03:00
|
|
|
def test_pdf_append(tmp_path: Path) -> None:
|
2020-03-22 22:54:54 +03:00
|
|
|
# make a PDF file
|
|
|
|
pdf_filename = helper_save_as_pdf(tmp_path, "RGB", producer="PdfParser")
|
|
|
|
|
|
|
|
# open it, check pages and info
|
|
|
|
with PdfParser.PdfParser(pdf_filename, mode="r+b") as pdf:
|
|
|
|
assert len(pdf.pages) == 1
|
|
|
|
assert len(pdf.info) == 4
|
|
|
|
assert pdf.info.Title == os.path.splitext(os.path.basename(pdf_filename))[0]
|
|
|
|
assert pdf.info.Producer == "PdfParser"
|
|
|
|
assert b"CreationDate" in pdf.info
|
|
|
|
assert b"ModDate" in pdf.info
|
|
|
|
check_pdf_pages_consistency(pdf)
|
|
|
|
|
|
|
|
# append some info
|
|
|
|
pdf.info.Title = "abc"
|
|
|
|
pdf.info.Author = "def"
|
|
|
|
pdf.info.Subject = "ghi\uABCD"
|
|
|
|
pdf.info.Keywords = "qw)e\\r(ty"
|
|
|
|
pdf.info.Creator = "hopper()"
|
|
|
|
pdf.start_writing()
|
|
|
|
pdf.write_xref_and_trailer()
|
|
|
|
|
|
|
|
# open it again, check pages and info again
|
|
|
|
with PdfParser.PdfParser(pdf_filename) as pdf:
|
|
|
|
assert len(pdf.pages) == 1
|
|
|
|
assert len(pdf.info) == 8
|
|
|
|
assert pdf.info.Title == "abc"
|
|
|
|
assert b"CreationDate" in pdf.info
|
|
|
|
assert b"ModDate" in pdf.info
|
|
|
|
check_pdf_pages_consistency(pdf)
|
|
|
|
|
|
|
|
# append two images
|
2022-04-10 21:23:31 +03:00
|
|
|
mode_cmyk = hopper("CMYK")
|
|
|
|
mode_p = hopper("P")
|
|
|
|
mode_cmyk.save(pdf_filename, append=True, save_all=True, append_images=[mode_p])
|
2020-03-22 22:54:54 +03:00
|
|
|
|
|
|
|
# open the PDF again, check pages and info again
|
|
|
|
with PdfParser.PdfParser(pdf_filename) as pdf:
|
|
|
|
assert len(pdf.pages) == 3
|
|
|
|
assert len(pdf.info) == 8
|
|
|
|
assert PdfParser.decode_text(pdf.info[b"Title"]) == "abc"
|
|
|
|
assert pdf.info.Title == "abc"
|
|
|
|
assert pdf.info.Producer == "PdfParser"
|
|
|
|
assert pdf.info.Keywords == "qw)e\\r(ty"
|
|
|
|
assert pdf.info.Subject == "ghi\uABCD"
|
|
|
|
assert b"CreationDate" in pdf.info
|
|
|
|
assert b"ModDate" in pdf.info
|
|
|
|
check_pdf_pages_consistency(pdf)
|
|
|
|
|
|
|
|
|
2024-01-31 12:12:58 +03:00
|
|
|
def test_pdf_info(tmp_path: Path) -> None:
|
2020-03-22 22:54:54 +03:00
|
|
|
# make a PDF file
|
|
|
|
pdf_filename = helper_save_as_pdf(
|
|
|
|
tmp_path,
|
|
|
|
"RGB",
|
|
|
|
title="title",
|
|
|
|
author="author",
|
|
|
|
subject="subject",
|
|
|
|
keywords="keywords",
|
|
|
|
creator="creator",
|
|
|
|
producer="producer",
|
|
|
|
creationDate=time.strptime("2000", "%Y"),
|
|
|
|
modDate=time.strptime("2001", "%Y"),
|
|
|
|
)
|
|
|
|
|
|
|
|
# open it, check pages and info
|
|
|
|
with PdfParser.PdfParser(pdf_filename) as pdf:
|
|
|
|
assert len(pdf.info) == 8
|
|
|
|
assert pdf.info.Title == "title"
|
|
|
|
assert pdf.info.Author == "author"
|
|
|
|
assert pdf.info.Subject == "subject"
|
|
|
|
assert pdf.info.Keywords == "keywords"
|
|
|
|
assert pdf.info.Creator == "creator"
|
|
|
|
assert pdf.info.Producer == "producer"
|
|
|
|
assert pdf.info.CreationDate == time.strptime("2000", "%Y")
|
|
|
|
assert pdf.info.ModDate == time.strptime("2001", "%Y")
|
|
|
|
check_pdf_pages_consistency(pdf)
|
|
|
|
|
|
|
|
|
2024-01-31 12:12:58 +03:00
|
|
|
def test_pdf_append_to_bytesio() -> None:
|
2020-03-22 22:54:54 +03:00
|
|
|
im = hopper("RGB")
|
|
|
|
f = io.BytesIO()
|
|
|
|
im.save(f, format="PDF")
|
|
|
|
initial_size = len(f.getvalue())
|
|
|
|
assert initial_size > 0
|
|
|
|
im = hopper("P")
|
|
|
|
f = io.BytesIO(f.getvalue())
|
|
|
|
im.save(f, format="PDF", append=True)
|
|
|
|
assert len(f.getvalue()) > initial_size
|
2021-04-08 23:53:22 +03:00
|
|
|
|
|
|
|
|
|
|
|
@pytest.mark.timeout(1)
|
2022-12-31 05:47:07 +03:00
|
|
|
@pytest.mark.skipif("PILLOW_VALGRIND_TEST" in os.environ, reason="Valgrind is slower")
|
2021-10-11 17:22:56 +03:00
|
|
|
@pytest.mark.parametrize("newline", (b"\r", b"\n"))
|
2024-02-17 07:00:38 +03:00
|
|
|
def test_redos(newline: bytes) -> None:
|
2021-10-11 17:22:56 +03:00
|
|
|
malicious = b" trailer<<>>" + newline * 3456
|
2021-04-08 23:53:22 +03:00
|
|
|
|
|
|
|
# This particular exception isn't relevant here.
|
|
|
|
# The important thing is it doesn't timeout, cause a ReDoS (CVE-2021-25292).
|
|
|
|
with pytest.raises(PdfParser.PdfFormatError):
|
|
|
|
PdfParser.PdfParser(buf=malicious)
|