refactor EpsImagePlugin

Merge the PSFile class into the EpsImageFile class to hopefully improve performance. Also added a check for the required "%!PS-Adobe" and "%%BoundingBox" header comments.
2025-11-10 04:47:43 +03:00 · 2023-01-10 00:03:07 -06:00 · 2023-01-10 00:03:07 -06:00 · c3134dc049
commit c3134dc049
parent 50f7888e3a
2 changed files with 94 additions and 46 deletions
--- a/Tests/test_file_eps.py
+++ b/Tests/test_file_eps.py
@ -221,7 +221,7 @@ def test_read_binary_preview():
        pass
-def test_readline(tmp_path):
+def test_readline_psfile(tmp_path):
    # check all the freaking line endings possible from the spec
    # test_string = u'something\r\nelse\n\rbaz\rbif\n'
    line_endings = ["\r\n", "\n", "\n\r", "\r"]
@ -256,6 +256,31 @@ def test_readline(tmp_path):
        _test_readline_file_psfile(s, ending)
@pytest.mark.parametrize(
    "line_ending",
    (b"\r\n", b"\n", b"\n\r", b"\r"),
 )
 def test_readline(line_ending):
    simple_file = line_ending.join(
        (
            b"%!PS-Adobe-3.0 EPSF-3.0",
            b"%%Comment1: Some Value",
            b"%%SecondComment: Another Value",
            b"%%BoundingBox: 5 5 105 105",
            b"10 setlinewidth",
            b"10 10 moveto",
            b"0 90 rlineto 90 0 rlineto 0 -90 rlineto closepath",
            b"stroke",
        )
    )
    data = io.BytesIO(simple_file)
    test_file = EpsImagePlugin.EpsImageFile(data)
    assert test_file.info["Comment1"] == "Some Value"
    assert test_file.info["SecondComment"] == "Another Value"
    assert test_file.size == (100, 100)
@pytest.mark.parametrize(
    "filename",
    (
--- a/src/PIL/EpsImagePlugin.py
+++ b/src/PIL/EpsImagePlugin.py
@ -162,6 +162,7 @@ def Ghostscript(tile, size, fp, scale=1, transparency=False):
 class PSFile:
    """
    Wrapper for bytesio object that treats either CR or LF as end of line.
    This class is no longer used internally, but kept for backwards-compatibility.
    """
    def __init__(self, fp):
@ -209,29 +210,69 @@ class EpsImageFile(ImageFile.ImageFile):
    def _open(self):
        (length, offset) = self._find_offset(self.fp)
        # Rewrap the open file pointer in something that will
        # convert line endings and decode to latin-1.
        fp = PSFile(self.fp)
        # go to offset - start of "%!PS"
-        fp.seek(offset)
+        self.fp.seek(offset)
        box = None
        self.mode = "RGB"
-        self._size = 1, 1  # FIXME: huh?
+        self._size = None
-        #
+        byte_arr = bytearray(255)
-        # Load EPS header
+        bytes_mv = memoryview(byte_arr)
        bytes_read = 0
        reading_comments = True
-        s_raw = fp.readline()
+        def check_required_header_comments():
-        s = s_raw.strip("\r\n")
+            if "PS-Adobe" not in self.info:
                msg = 'EPS header missing "%!PS-Adobe" comment'
                raise SyntaxError(msg)
            if "BoundingBox" not in self.info:
                msg = 'EPS header missing "%%BoundingBox" comment'
                raise SyntaxError(msg)
-        while s_raw:
+        while True:
-            if s:
+            byte = self.fp.read(1)
-                if len(s) > 255:
+            if byte == b"":
                # if we didn't read a byte we must be at the end of the file
                if bytes_read == 0:
                    break
            elif byte in b"\r\n":
                # if we read a line ending character, ignore it and parse what
                # we have already read. if we haven't read any other characters,
                # continue reading
                if bytes_read == 0:
                    continue
            else:
                # ASCII/hexadecimal lines in an EPS file must not exceed
                # 255 characters, not including line ending characters
                if bytes_read >= 255:
                    # only enforce this for lines starting with a "%",
                    # otherwise assume it's binary data
                    if byte_arr[0] == ord("%"):
                        msg = "not an EPS file"
                        raise SyntaxError(msg)
                    else:
                        if reading_comments:
                            check_required_header_comments()
                            reading_comments = False
                        # reset bytes_read so we can keep reading
                        # data until the end of the line
                        bytes_read = 0
                byte_arr[bytes_read] = byte[0]
                bytes_read += 1
                continue
            if reading_comments:
                # Load EPS header
                # if this line doesn't start with a "%",
                # or does start with "%%EndComments",
                # then we've reached the end of the header/comments
                if byte_arr[0] != ord("%") or bytes_mv[:13] == b"%%EndComments":
                    check_required_header_comments()
                    reading_comments = False
                    continue
                s = str(bytes_mv[:bytes_read], "latin-1")
                try:
                    m = split.match(s)
@ -254,16 +295,12 @@ class EpsImageFile(ImageFile.ImageFile):
                            ]
                        except Exception:
                            pass
                else:
                    m = field.match(s)
                    if m:
                        k = m.group(1)
                        if k == "EndComments":
                            break
                        if k[:8] == "PS-Adobe":
-                            self.info[k[:8]] = k[9:]
+                            self.info["PS-Adobe"] = k[9:]
                        else:
                            self.info[k] = ""
                    elif s[0] == "%":
@ -273,25 +310,11 @@ class EpsImageFile(ImageFile.ImageFile):
                    else:
                        msg = "bad EPS header"
                        raise OSError(msg)
            elif bytes_mv[:11] == b"%ImageData:":
                # Check for an "ImageData" descriptor
            s_raw = fp.readline()
            s = s_raw.strip("\r\n")
            if s and s[:1] != "%":
                break
        #
        # Scan for an "ImageData" descriptor
        while s[:1] == "%":
            if len(s) > 255:
                msg = "not an EPS file"
                raise SyntaxError(msg)
            if s[:11] == "%ImageData:":
                # Encoded bitmapped image.
-                x, y, bi, mo = s[11:].split(None, 7)[:4]
+                x, y, bi, mo = byte_arr[11:].split(None, 7)[:4]
                if int(bi) == 1:
                    self.mode = "1"
@ -306,16 +329,16 @@ class EpsImageFile(ImageFile.ImageFile):
                self._size = int(x), int(y)
                return
-            s = fp.readline().strip("\r\n")
+            bytes_read = 0
            if not s:
                break
-        if not box:
+        check_required_header_comments()
        if not self._size:
            self._size = 1, 1  # errors if this isn't set. why (1,1)?
            msg = "cannot determine EPS bounding box"
            raise OSError(msg)
    def _find_offset(self, fp):
        s = fp.read(160)
        if s[:4] == b"%!PS":