Merge pull request #7382 from nopperl/parse-eps-trailer

Read bounding box information from the trailer of EPS files if specified
This commit is contained in:
Andrew Murray 2023-10-02 09:53:35 +11:00 committed by GitHub
commit 43e6826199
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 64 additions and 28 deletions

Binary file not shown.

Binary file not shown.

View File

@ -404,3 +404,18 @@ def test_timeout(test_file):
with pytest.raises(Image.UnidentifiedImageError):
with Image.open(f):
pass
def test_bounding_box_in_trailer():
# Check bounding boxes are parsed in the same way
# when specified in the header and the trailer
with Image.open("Tests/images/zero_bb_trailer.eps") as trailer_image, Image.open(
FILE1
) as header_image:
assert trailer_image.size == header_image.size
def test_eof_before_bounding_box():
with pytest.raises(OSError):
with Image.open("Tests/images/zero_bb_eof_before_boundingbox.eps"):
pass

View File

@ -233,7 +233,9 @@ class EpsImageFile(ImageFile.ImageFile):
byte_arr = bytearray(255)
bytes_mv = memoryview(byte_arr)
bytes_read = 0
reading_comments = True
reading_header_comments = True
reading_trailer_comments = False
trailer_reached = False
def check_required_header_comments():
if "PS-Adobe" not in self.info:
@ -243,6 +245,36 @@ class EpsImageFile(ImageFile.ImageFile):
msg = 'EPS header missing "%%BoundingBox" comment'
raise SyntaxError(msg)
def _read_comment(s):
nonlocal reading_trailer_comments
try:
m = split.match(s)
except re.error as e:
msg = "not an EPS file"
raise SyntaxError(msg) from e
if m:
k, v = m.group(1, 2)
self.info[k] = v
if k == "BoundingBox":
if v == "(atend)":
reading_trailer_comments = True
elif not self._size or (
trailer_reached and reading_trailer_comments
):
try:
# Note: The DSC spec says that BoundingBox
# fields should be integers, but some drivers
# put floating point values there anyway.
box = [int(float(i)) for i in v.split()]
self._size = box[2] - box[0], box[3] - box[1]
self.tile = [
("eps", (0, 0) + self.size, offset, (length, box))
]
except Exception:
pass
return True
while True:
byte = self.fp.read(1)
if byte == b"":
@ -265,9 +297,9 @@ class EpsImageFile(ImageFile.ImageFile):
msg = "not an EPS file"
raise SyntaxError(msg)
else:
if reading_comments:
if reading_header_comments:
check_required_header_comments()
reading_comments = False
reading_header_comments = False
# reset bytes_read so we can keep reading
# data until the end of the line
bytes_read = 0
@ -275,7 +307,7 @@ class EpsImageFile(ImageFile.ImageFile):
bytes_read += 1
continue
if reading_comments:
if reading_header_comments:
# Load EPS header
# if this line doesn't start with a "%",
@ -283,33 +315,11 @@ class EpsImageFile(ImageFile.ImageFile):
# then we've reached the end of the header/comments
if byte_arr[0] != ord("%") or bytes_mv[:13] == b"%%EndComments":
check_required_header_comments()
reading_comments = False
reading_header_comments = False
continue
s = str(bytes_mv[:bytes_read], "latin-1")
try:
m = split.match(s)
except re.error as e:
msg = "not an EPS file"
raise SyntaxError(msg) from e
if m:
k, v = m.group(1, 2)
self.info[k] = v
if k == "BoundingBox":
try:
# Note: The DSC spec says that BoundingBox
# fields should be integers, but some drivers
# put floating point values there anyway.
box = [int(float(i)) for i in v.split()]
self._size = box[2] - box[0], box[3] - box[1]
self.tile = [
("eps", (0, 0) + self.size, offset, (length, box))
]
except Exception:
pass
else:
if not _read_comment(s):
m = field.match(s)
if m:
k = m.group(1)
@ -355,7 +365,18 @@ class EpsImageFile(ImageFile.ImageFile):
self._size = columns, rows
return
elif trailer_reached and reading_trailer_comments:
# Load EPS trailer
# if this line starts with "%%EOF",
# then we've reached the end of the file
if bytes_mv[:5] == b"%%EOF":
break
s = str(bytes_mv[:bytes_read], "latin-1")
_read_comment(s)
elif bytes_mv[:9] == b"%%Trailer":
trailer_reached = True
bytes_read = 0
check_required_header_comments()