Merge pull request #6879 from Yay295/eps_plugin_perf

This commit is contained in:
Hugo van Kemenade 2023-04-01 08:55:48 +03:00 committed by GitHub
commit 48b0be2fb7
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
6 changed files with 260 additions and 98 deletions

View File

@ -29,7 +29,7 @@ def test_version(version, expected):
def test_unknown_version():
expected = r"Unknown removal version, update PIL\._deprecate\?"
expected = r"Unknown removal version: 12345. Update PIL\._deprecate\?"
with pytest.raises(ValueError, match=expected):
_deprecate.deprecate("Old thing", 12345, "new thing")

View File

@ -28,34 +28,65 @@ FILE2_COMPARE_SCALE2 = "Tests/images/non_zero_bb_scale2.png"
# EPS test files with binary preview
FILE3 = "Tests/images/binary_preview_map.eps"
# Three unsigned 32bit little-endian values:
# 0xC6D3D0C5 magic number
# byte position of start of postscript section (12)
# byte length of postscript section (0)
# this byte length isn't valid, but we don't read it
simple_binary_header = b"\xc5\xd0\xd3\xc6\x0c\x00\x00\x00\x00\x00\x00\x00"
# taken from page 8 of the specification
# https://web.archive.org/web/20220120164601/https://www.adobe.com/content/dam/acom/en/devnet/actionscript/articles/5002.EPSF_Spec.pdf
simple_eps_file = (
b"%!PS-Adobe-3.0 EPSF-3.0",
b"%%BoundingBox: 5 5 105 105",
b"10 setlinewidth",
b"10 10 moveto",
b"0 90 rlineto 90 0 rlineto 0 -90 rlineto closepath",
b"stroke",
)
simple_eps_file_with_comments = (
simple_eps_file[:1]
+ (
b"%%Comment1: Some Value",
b"%%SecondComment: Another Value",
)
+ simple_eps_file[1:]
)
simple_eps_file_without_version = simple_eps_file[1:]
simple_eps_file_without_boundingbox = simple_eps_file[:1] + simple_eps_file[2:]
simple_eps_file_with_invalid_boundingbox = (
simple_eps_file[:1] + (b"%%BoundingBox: a b c d",) + simple_eps_file[2:]
)
simple_eps_file_with_invalid_boundingbox_valid_imagedata = (
simple_eps_file_with_invalid_boundingbox + (b"%ImageData: 100 100 8 3",)
)
simple_eps_file_with_long_ascii_comment = (
simple_eps_file[:2] + (b"%%Comment: " + b"X" * 300,) + simple_eps_file[2:]
)
simple_eps_file_with_long_binary_data = (
simple_eps_file[:2]
+ (
b"%%BeginBinary: 300",
b"\0" * 300,
b"%%EndBinary",
)
+ simple_eps_file[2:]
)
@pytest.mark.skipif(not HAS_GHOSTSCRIPT, reason="Ghostscript not available")
def test_sanity():
# Regular scale
with Image.open(FILE1) as image1:
image1.load()
assert image1.mode == "RGB"
assert image1.size == (460, 352)
assert image1.format == "EPS"
with Image.open(FILE2) as image2:
image2.load()
assert image2.mode == "RGB"
assert image2.size == (360, 252)
assert image2.format == "EPS"
# Double scale
with Image.open(FILE1) as image1_scale2:
image1_scale2.load(scale=2)
assert image1_scale2.mode == "RGB"
assert image1_scale2.size == (920, 704)
assert image1_scale2.format == "EPS"
with Image.open(FILE2) as image2_scale2:
image2_scale2.load(scale=2)
assert image2_scale2.mode == "RGB"
assert image2_scale2.size == (720, 504)
assert image2_scale2.format == "EPS"
@pytest.mark.parametrize(
("filename", "size"), ((FILE1, (460, 352)), (FILE2, (360, 252)))
)
@pytest.mark.parametrize("scale", (1, 2))
def test_sanity(filename, size, scale):
expected_size = tuple(s * scale for s in size)
with Image.open(filename) as image:
image.load(scale=scale)
assert image.mode == "RGB"
assert image.size == expected_size
assert image.format == "EPS"
@pytest.mark.skipif(not HAS_GHOSTSCRIPT, reason="Ghostscript not available")
@ -69,11 +100,72 @@ def test_load():
def test_invalid_file():
invalid_file = "Tests/images/flower.jpg"
with pytest.raises(SyntaxError):
EpsImagePlugin.EpsImageFile(invalid_file)
def test_binary_header_only():
data = io.BytesIO(simple_binary_header)
with pytest.raises(SyntaxError, match='EPS header missing "%!PS-Adobe" comment'):
EpsImagePlugin.EpsImageFile(data)
@pytest.mark.parametrize("prefix", (b"", simple_binary_header))
def test_missing_version_comment(prefix):
data = io.BytesIO(prefix + b"\n".join(simple_eps_file_without_version))
with pytest.raises(SyntaxError):
EpsImagePlugin.EpsImageFile(data)
@pytest.mark.parametrize("prefix", (b"", simple_binary_header))
def test_missing_boundingbox_comment(prefix):
data = io.BytesIO(prefix + b"\n".join(simple_eps_file_without_boundingbox))
with pytest.raises(SyntaxError, match='EPS header missing "%%BoundingBox" comment'):
EpsImagePlugin.EpsImageFile(data)
@pytest.mark.parametrize("prefix", (b"", simple_binary_header))
def test_invalid_boundingbox_comment(prefix):
data = io.BytesIO(prefix + b"\n".join(simple_eps_file_with_invalid_boundingbox))
with pytest.raises(OSError, match="cannot determine EPS bounding box"):
EpsImagePlugin.EpsImageFile(data)
@pytest.mark.parametrize("prefix", (b"", simple_binary_header))
def test_invalid_boundingbox_comment_valid_imagedata_comment(prefix):
data = io.BytesIO(
prefix + b"\n".join(simple_eps_file_with_invalid_boundingbox_valid_imagedata)
)
with Image.open(data) as img:
assert img.mode == "RGB"
assert img.size == (100, 100)
assert img.format == "EPS"
@pytest.mark.parametrize("prefix", (b"", simple_binary_header))
def test_ascii_comment_too_long(prefix):
data = io.BytesIO(prefix + b"\n".join(simple_eps_file_with_long_ascii_comment))
with pytest.raises(SyntaxError, match="not an EPS file"):
EpsImagePlugin.EpsImageFile(data)
@pytest.mark.parametrize("prefix", (b"", simple_binary_header))
def test_long_binary_data(prefix):
data = io.BytesIO(prefix + b"\n".join(simple_eps_file_with_long_binary_data))
EpsImagePlugin.EpsImageFile(data)
@pytest.mark.skipif(not HAS_GHOSTSCRIPT, reason="Ghostscript not available")
@pytest.mark.parametrize("prefix", (b"", simple_binary_header))
def test_load_long_binary_data(prefix):
data = io.BytesIO(prefix + b"\n".join(simple_eps_file_with_long_binary_data))
with Image.open(data) as img:
img.load()
assert img.mode == "RGB"
assert img.size == (100, 100)
assert img.format == "EPS"
@mark_if_feature_version(
pytest.mark.valgrind_known_error, "libjpeg_turbo", "2.0", reason="Known Failing"
)
@ -100,7 +192,7 @@ def test_showpage():
with Image.open("Tests/images/reqd_showpage.png") as target:
# should not crash/hang
plot_image.load()
# fonts could be slightly different
# fonts could be slightly different
assert_image_similar(plot_image, target, 6)
@ -111,7 +203,7 @@ def test_transparency():
assert plot_image.mode == "RGBA"
with Image.open("Tests/images/reqd_showpage_transparency.png") as target:
# fonts could be slightly different
# fonts could be slightly different
assert_image_similar(plot_image, target, 6)
@ -206,7 +298,6 @@ def test_resize(filename):
@pytest.mark.parametrize("filename", (FILE1, FILE2))
def test_thumbnail(filename):
# Issue #619
# Arrange
with Image.open(filename) as im:
new_size = (100, 100)
im.thumbnail(new_size)
@ -220,7 +311,7 @@ def test_read_binary_preview():
pass
def test_readline(tmp_path):
def test_readline_psfile(tmp_path):
# check all the freaking line endings possible from the spec
# test_string = u'something\r\nelse\n\rbaz\rbif\n'
line_endings = ["\r\n", "\n", "\n\r", "\r"]
@ -237,7 +328,8 @@ def test_readline(tmp_path):
def _test_readline_io_psfile(test_string, ending):
f = io.BytesIO(test_string.encode("latin-1"))
t = EpsImagePlugin.PSFile(f)
with pytest.warns(DeprecationWarning):
t = EpsImagePlugin.PSFile(f)
_test_readline(t, ending)
def _test_readline_file_psfile(test_string, ending):
@ -246,7 +338,8 @@ def test_readline(tmp_path):
w.write(test_string.encode("latin-1"))
with open(f, "rb") as r:
t = EpsImagePlugin.PSFile(r)
with pytest.warns(DeprecationWarning):
t = EpsImagePlugin.PSFile(r)
_test_readline(t, ending)
for ending in line_endings:
@ -255,6 +348,25 @@ def test_readline(tmp_path):
_test_readline_file_psfile(s, ending)
def test_psfile_deprecation():
with pytest.warns(DeprecationWarning):
EpsImagePlugin.PSFile(None)
@pytest.mark.parametrize("prefix", (b"", simple_binary_header))
@pytest.mark.parametrize(
"line_ending",
(b"\r\n", b"\n", b"\n\r", b"\r"),
)
def test_readline(prefix, line_ending):
simple_file = prefix + line_ending.join(simple_eps_file_with_comments)
data = io.BytesIO(simple_file)
test_file = EpsImagePlugin.EpsImageFile(data)
assert test_file.info["Comment1"] == "Some Value"
assert test_file.info["SecondComment"] == "Another Value"
assert test_file.size == (100, 100)
@pytest.mark.parametrize(
"filename",
(

View File

@ -207,6 +207,16 @@ Use instead::
left, top, right, bottom = draw.multiline_textbbox((0, 0), "Hello\nworld")
width, height = right - left, bottom - top
PSFile
~~~~~~
.. deprecated:: 9.5.0
The :py:class:`~PIL.EpsImagePlugin.PSFile` class has been deprecated and will
be removed in Pillow 11 (2024-10-15). This class was only made as a helper to
be used internally, so there is no replacement. If you need this functionality
though, it is a very short class that can easily be recreated in your own code.
Removed features
----------------

View File

@ -12,10 +12,13 @@ TODO
Deprecations
============
TODO
^^^^
PSFile
^^^^^^
TODO
The :py:class:`~PIL.EpsImagePlugin.PSFile` class has been deprecated and will
be removed in Pillow 11 (2024-10-15). This class was only made as a helper to
be used internally, so there is no replacement. If you need this functionality
though, it is a very short class that can easily be recreated in your own code.
API Changes
===========

View File

@ -29,10 +29,11 @@ import tempfile
from . import Image, ImageFile
from ._binary import i32le as i32
from ._deprecate import deprecate
#
# --------------------------------------------------------------------
split = re.compile(r"^%%([^:]*):[ \t]*(.*)[ \t]*$")
field = re.compile(r"^%[%!\w]([^:]*)[ \t]*$")
@ -162,9 +163,16 @@ def Ghostscript(tile, size, fp, scale=1, transparency=False):
class PSFile:
"""
Wrapper for bytesio object that treats either CR or LF as end of line.
This class is no longer used internally, but kept for backwards compatibility.
"""
def __init__(self, fp):
deprecate(
"PSFile",
11,
action="If you need the functionality of this class "
"you will need to implement it yourself.",
)
self.fp = fp
self.char = None
@ -173,13 +181,11 @@ class PSFile:
self.fp.seek(offset, whence)
def readline(self):
s = []
if self.char:
s.append(self.char)
self.char = None
s = [self.char or b""]
self.char = None
c = self.fp.read(1)
while (c not in b"\r\n") and len(c) and len(b"".join(s).strip(b"\r\n")) <= 255:
while (c not in b"\r\n") and len(c):
s.append(c)
c = self.fp.read(1)
@ -196,7 +202,7 @@ def _accept(prefix):
##
# Image plugin for Encapsulated PostScript. This plugin supports only
# Image plugin for Encapsulated PostScript. This plugin supports only
# a few variants of this format.
@ -211,29 +217,69 @@ class EpsImageFile(ImageFile.ImageFile):
def _open(self):
(length, offset) = self._find_offset(self.fp)
# Rewrap the open file pointer in something that will
# convert line endings and decode to latin-1.
fp = PSFile(self.fp)
# go to offset - start of "%!PS"
fp.seek(offset)
box = None
self.fp.seek(offset)
self.mode = "RGB"
self._size = 1, 1 # FIXME: huh?
self._size = None
#
# Load EPS header
byte_arr = bytearray(255)
bytes_mv = memoryview(byte_arr)
bytes_read = 0
reading_comments = True
s_raw = fp.readline()
s = s_raw.strip("\r\n")
def check_required_header_comments():
if "PS-Adobe" not in self.info:
msg = 'EPS header missing "%!PS-Adobe" comment'
raise SyntaxError(msg)
if "BoundingBox" not in self.info:
msg = 'EPS header missing "%%BoundingBox" comment'
raise SyntaxError(msg)
while s_raw:
if s:
if len(s) > 255:
msg = "not an EPS file"
raise SyntaxError(msg)
while True:
byte = self.fp.read(1)
if byte == b"":
# if we didn't read a byte we must be at the end of the file
if bytes_read == 0:
break
elif byte in b"\r\n":
# if we read a line ending character, ignore it and parse what
# we have already read. if we haven't read any other characters,
# continue reading
if bytes_read == 0:
continue
else:
# ASCII/hexadecimal lines in an EPS file must not exceed
# 255 characters, not including line ending characters
if bytes_read >= 255:
# only enforce this for lines starting with a "%",
# otherwise assume it's binary data
if byte_arr[0] == ord("%"):
msg = "not an EPS file"
raise SyntaxError(msg)
else:
if reading_comments:
check_required_header_comments()
reading_comments = False
# reset bytes_read so we can keep reading
# data until the end of the line
bytes_read = 0
byte_arr[bytes_read] = byte[0]
bytes_read += 1
continue
if reading_comments:
# Load EPS header
# if this line doesn't start with a "%",
# or does start with "%%EndComments",
# then we've reached the end of the header/comments
if byte_arr[0] != ord("%") or bytes_mv[:13] == b"%%EndComments":
check_required_header_comments()
reading_comments = False
continue
s = str(bytes_mv[:bytes_read], "latin-1")
try:
m = split.match(s)
@ -256,16 +302,12 @@ class EpsImageFile(ImageFile.ImageFile):
]
except Exception:
pass
else:
m = field.match(s)
if m:
k = m.group(1)
if k == "EndComments":
break
if k[:8] == "PS-Adobe":
self.info[k[:8]] = k[9:]
self.info["PS-Adobe"] = k[9:]
else:
self.info[k] = ""
elif s[0] == "%":
@ -275,43 +317,44 @@ class EpsImageFile(ImageFile.ImageFile):
else:
msg = "bad EPS header"
raise OSError(msg)
elif bytes_mv[:11] == b"%ImageData:":
# Check for an "ImageData" descriptor
# https://www.adobe.com/devnet-apps/photoshop/fileformatashtml/#50577413_pgfId-1035096
s_raw = fp.readline()
s = s_raw.strip("\r\n")
# Values:
# columns
# rows
# bit depth (1 or 8)
# mode (1: L, 2: LAB, 3: RGB, 4: CMYK)
# number of padding channels
# block size (number of bytes per row per channel)
# binary/ascii (1: binary, 2: ascii)
# data start identifier (the image data follows after a single line
# consisting only of this quoted value)
image_data_values = byte_arr[11:bytes_read].split(None, 7)
columns, rows, bit_depth, mode_id = [
int(value) for value in image_data_values[:4]
]
if s and s[:1] != "%":
break
#
# Scan for an "ImageData" descriptor
while s[:1] == "%":
if len(s) > 255:
msg = "not an EPS file"
raise SyntaxError(msg)
if s[:11] == "%ImageData:":
# Encoded bitmapped image.
x, y, bi, mo = s[11:].split(None, 7)[:4]
if int(bi) == 1:
if bit_depth == 1:
self.mode = "1"
elif int(bi) == 8:
elif bit_depth == 8:
try:
self.mode = self.mode_map[int(mo)]
self.mode = self.mode_map[mode_id]
except ValueError:
break
else:
break
self._size = int(x), int(y)
self._size = columns, rows
return
s = fp.readline().strip("\r\n")
if not s:
break
bytes_read = 0
if not box:
check_required_header_comments()
if not self._size:
self._size = 1, 1 # errors if this isn't set. why (1,1)?
msg = "cannot determine EPS bounding box"
raise OSError(msg)
@ -353,18 +396,15 @@ class EpsImageFile(ImageFile.ImageFile):
pass
#
# --------------------------------------------------------------------
def _save(im, fp, filename, eps=1):
"""EPS Writer for the Python Imaging Library."""
#
# make sure image data is available
im.load()
#
# determine PostScript image mode
if im.mode == "L":
operator = (8, 1, b"image")
@ -377,7 +417,6 @@ def _save(im, fp, filename, eps=1):
raise ValueError(msg)
if eps:
#
# write EPS header
fp.write(b"%!PS-Adobe-3.0 EPSF-3.0\n")
fp.write(b"%%Creator: PIL 0.1 EpsEncode\n")
@ -389,7 +428,6 @@ def _save(im, fp, filename, eps=1):
fp.write(b"%%ImageData: %d %d " % im.size)
fp.write(b'%d %d 0 1 1 "%s"\n' % operator)
#
# image header
fp.write(b"gsave\n")
fp.write(b"10 dict begin\n")
@ -410,7 +448,6 @@ def _save(im, fp, filename, eps=1):
fp.flush()
#
# --------------------------------------------------------------------

View File

@ -50,7 +50,7 @@ def deprecate(
elif when == 11:
removed = "Pillow 11 (2024-10-15)"
else:
msg = f"Unknown removal version, update {__name__}?"
msg = f"Unknown removal version: {when}. Update {__name__}?"
raise ValueError(msg)
if replacement and action: