refactor EpsImagePlugin

Merge the PSFile class into the EpsImageFile class to hopefully improve performance.
Also added a check for the required "%!PS-Adobe" and "%%BoundingBox" header comments.
This commit is contained in:
Yay295 2023-01-10 00:03:07 -06:00
parent 50f7888e3a
commit c3134dc049
2 changed files with 94 additions and 46 deletions

View File

@ -221,7 +221,7 @@ def test_read_binary_preview():
pass pass
def test_readline(tmp_path): def test_readline_psfile(tmp_path):
# check all the freaking line endings possible from the spec # check all the freaking line endings possible from the spec
# test_string = u'something\r\nelse\n\rbaz\rbif\n' # test_string = u'something\r\nelse\n\rbaz\rbif\n'
line_endings = ["\r\n", "\n", "\n\r", "\r"] line_endings = ["\r\n", "\n", "\n\r", "\r"]
@ -256,6 +256,31 @@ def test_readline(tmp_path):
_test_readline_file_psfile(s, ending) _test_readline_file_psfile(s, ending)
@pytest.mark.parametrize(
"line_ending",
(b"\r\n", b"\n", b"\n\r", b"\r"),
)
def test_readline(line_ending):
simple_file = line_ending.join(
(
b"%!PS-Adobe-3.0 EPSF-3.0",
b"%%Comment1: Some Value",
b"%%SecondComment: Another Value",
b"%%BoundingBox: 5 5 105 105",
b"10 setlinewidth",
b"10 10 moveto",
b"0 90 rlineto 90 0 rlineto 0 -90 rlineto closepath",
b"stroke",
)
)
data = io.BytesIO(simple_file)
test_file = EpsImagePlugin.EpsImageFile(data)
assert test_file.info["Comment1"] == "Some Value"
assert test_file.info["SecondComment"] == "Another Value"
assert test_file.size == (100, 100)
@pytest.mark.parametrize( @pytest.mark.parametrize(
"filename", "filename",
( (

View File

@ -162,6 +162,7 @@ def Ghostscript(tile, size, fp, scale=1, transparency=False):
class PSFile: class PSFile:
""" """
Wrapper for bytesio object that treats either CR or LF as end of line. Wrapper for bytesio object that treats either CR or LF as end of line.
This class is no longer used internally, but kept for backwards-compatibility.
""" """
def __init__(self, fp): def __init__(self, fp):
@ -209,29 +210,69 @@ class EpsImageFile(ImageFile.ImageFile):
def _open(self): def _open(self):
(length, offset) = self._find_offset(self.fp) (length, offset) = self._find_offset(self.fp)
# Rewrap the open file pointer in something that will
# convert line endings and decode to latin-1.
fp = PSFile(self.fp)
# go to offset - start of "%!PS" # go to offset - start of "%!PS"
fp.seek(offset) self.fp.seek(offset)
box = None
self.mode = "RGB" self.mode = "RGB"
self._size = 1, 1 # FIXME: huh? self._size = None
# byte_arr = bytearray(255)
# Load EPS header bytes_mv = memoryview(byte_arr)
bytes_read = 0
reading_comments = True
s_raw = fp.readline() def check_required_header_comments():
s = s_raw.strip("\r\n") if "PS-Adobe" not in self.info:
msg = 'EPS header missing "%!PS-Adobe" comment'
raise SyntaxError(msg)
if "BoundingBox" not in self.info:
msg = 'EPS header missing "%%BoundingBox" comment'
raise SyntaxError(msg)
while s_raw: while True:
if s: byte = self.fp.read(1)
if len(s) > 255: if byte == b"":
# if we didn't read a byte we must be at the end of the file
if bytes_read == 0:
break
elif byte in b"\r\n":
# if we read a line ending character, ignore it and parse what
# we have already read. if we haven't read any other characters,
# continue reading
if bytes_read == 0:
continue
else:
# ASCII/hexadecimal lines in an EPS file must not exceed
# 255 characters, not including line ending characters
if bytes_read >= 255:
# only enforce this for lines starting with a "%",
# otherwise assume it's binary data
if byte_arr[0] == ord("%"):
msg = "not an EPS file" msg = "not an EPS file"
raise SyntaxError(msg) raise SyntaxError(msg)
else:
if reading_comments:
check_required_header_comments()
reading_comments = False
# reset bytes_read so we can keep reading
# data until the end of the line
bytes_read = 0
byte_arr[bytes_read] = byte[0]
bytes_read += 1
continue
if reading_comments:
# Load EPS header
# if this line doesn't start with a "%",
# or does start with "%%EndComments",
# then we've reached the end of the header/comments
if byte_arr[0] != ord("%") or bytes_mv[:13] == b"%%EndComments":
check_required_header_comments()
reading_comments = False
continue
s = str(bytes_mv[:bytes_read], "latin-1")
try: try:
m = split.match(s) m = split.match(s)
@ -254,16 +295,12 @@ class EpsImageFile(ImageFile.ImageFile):
] ]
except Exception: except Exception:
pass pass
else: else:
m = field.match(s) m = field.match(s)
if m: if m:
k = m.group(1) k = m.group(1)
if k == "EndComments":
break
if k[:8] == "PS-Adobe": if k[:8] == "PS-Adobe":
self.info[k[:8]] = k[9:] self.info["PS-Adobe"] = k[9:]
else: else:
self.info[k] = "" self.info[k] = ""
elif s[0] == "%": elif s[0] == "%":
@ -273,25 +310,11 @@ class EpsImageFile(ImageFile.ImageFile):
else: else:
msg = "bad EPS header" msg = "bad EPS header"
raise OSError(msg) raise OSError(msg)
elif bytes_mv[:11] == b"%ImageData:":
# Check for an "ImageData" descriptor
s_raw = fp.readline()
s = s_raw.strip("\r\n")
if s and s[:1] != "%":
break
#
# Scan for an "ImageData" descriptor
while s[:1] == "%":
if len(s) > 255:
msg = "not an EPS file"
raise SyntaxError(msg)
if s[:11] == "%ImageData:":
# Encoded bitmapped image. # Encoded bitmapped image.
x, y, bi, mo = s[11:].split(None, 7)[:4] x, y, bi, mo = byte_arr[11:].split(None, 7)[:4]
if int(bi) == 1: if int(bi) == 1:
self.mode = "1" self.mode = "1"
@ -306,16 +329,16 @@ class EpsImageFile(ImageFile.ImageFile):
self._size = int(x), int(y) self._size = int(x), int(y)
return return
s = fp.readline().strip("\r\n") bytes_read = 0
if not s:
break
if not box: check_required_header_comments()
if not self._size:
self._size = 1, 1 # errors if this isn't set. why (1,1)?
msg = "cannot determine EPS bounding box" msg = "cannot determine EPS bounding box"
raise OSError(msg) raise OSError(msg)
def _find_offset(self, fp): def _find_offset(self, fp):
s = fp.read(160) s = fp.read(160)
if s[:4] == b"%!PS": if s[:4] == b"%!PS":