Merge pull request #5242 from Piolie/plainPPM

Add support for decoding plain PPM formats
2026-02-13 02:30:28 +03:00 · 2022-06-14 22:24:25 +10:00 · 2022-06-14 22:24:25 +10:00 · aad3af437e
commit aad3af437e
parent c083eadccb 1bac1cf6f5
10 changed files with 300 additions and 28 deletions
--- a/Tests/images/hopper_16bit.pgm
+++ b/Tests/images/hopper_16bit.pgm
--- a/Tests/images/hopper_16bit_plain.pgm
+++ b/Tests/images/hopper_16bit_plain.pgm
--- a/Tests/images/hopper_1bit.pbm
+++ b/Tests/images/hopper_1bit.pbm
--- a/Tests/images/hopper_1bit_plain.pbm
+++ b/Tests/images/hopper_1bit_plain.pbm
--- a/Tests/images/hopper_8bit.pgm
+++ b/Tests/images/hopper_8bit.pgm
--- a/Tests/images/hopper_8bit.ppm
+++ b/Tests/images/hopper_8bit.ppm
--- a/Tests/images/hopper_8bit_plain.pgm
+++ b/Tests/images/hopper_8bit_plain.pgm
--- a/Tests/images/hopper_8bit_plain.ppm
+++ b/Tests/images/hopper_8bit_plain.ppm
--- a/Tests/test_file_ppm.py
+++ b/Tests/test_file_ppm.py
@ -3,7 +3,7 @@ from io import BytesIO

 import pytest

-from PIL import Image, UnidentifiedImageError
+from PIL import Image, PpmImagePlugin

 from .helper import assert_image_equal_tofile, assert_image_similar, hopper

@ -22,6 +22,21 @@ def test_sanity():
@pytest.mark.parametrize(
    "data, mode, pixels",
    (
+        (b"P2 3 1 4 0 2 4", "L", (0, 128, 255)),
+        (b"P2 3 1 257 0 128 257", "I", (0, 32640, 65535)),
+        # P3 with maxval < 255
+        (
+            b"P3 3 1 17 0 1 2 8 9 10 15 16 17",
+            "RGB",
+            ((0, 15, 30), (120, 135, 150), (225, 240, 255)),
+        ),
+        # P3 with maxval > 255
+        # Scale down to 255, since there is no RGB mode with more than 8-bit
+        (
+            b"P3 3 1 257 0 1 2 128 129 130 256 257 257",
+            "RGB",
+            ((0, 1, 2), (127, 128, 129), (254, 255, 255)),
+        ),
        (b"P5 3 1 4 \x00\x02\x04", "L", (0, 128, 255)),
        (b"P5 3 1 257 \x00\x00\x00\x80\x01\x01", "I", (0, 32640, 65535)),
        # P6 with maxval < 255
@ -35,7 +50,6 @@ def test_sanity():
            ),
        ),
        # P6 with maxval > 255
-        # Scale down to 255, since there is no RGB mode with more than 8-bit
        (
            b"P6 3 1 257 \x00\x00\x00\x01\x00\x02"
            b"\x00\x80\x00\x81\x00\x82\x01\x00\x01\x01\xFF\xFF",
@ -85,14 +99,111 @@ def test_pnm(tmp_path):
        assert_image_equal_tofile(im, f)


-def test_magic(tmp_path):
+@pytest.mark.parametrize(
+    "plain_path, raw_path",
+    (
+        (
+            "Tests/images/hopper_1bit_plain.pbm",  # P1
+            "Tests/images/hopper_1bit.pbm",  # P4
+        ),
+        (
+            "Tests/images/hopper_8bit_plain.pgm",  # P2
+            "Tests/images/hopper_8bit.pgm",  # P5
+        ),
+        (
+            "Tests/images/hopper_8bit_plain.ppm",  # P3
+            "Tests/images/hopper_8bit.ppm",  # P6
+        ),
+    ),
+)
+def test_plain(plain_path, raw_path):
+    with Image.open(plain_path) as im:
+        assert_image_equal_tofile(im, raw_path)
+
+
+def test_16bit_plain_pgm():
+    # P2 with maxval 2 ** 16 - 1
+    with Image.open("Tests/images/hopper_16bit_plain.pgm") as im:
+        assert im.mode == "I"
+        assert im.size == (128, 128)
+        assert im.get_format_mimetype() == "image/x-portable-graymap"
+
+        # P5 with maxval 2 ** 16 - 1
+        assert_image_equal_tofile(im, "Tests/images/hopper_16bit.pgm")
+
+
+@pytest.mark.parametrize(
+    "header, data, comment_count",
+    (
+        (b"P1\n2 2", b"1010", 10**6),
+        (b"P2\n3 1\n4", b"0 2 4", 1),
+        (b"P3\n2 2\n255", b"0 0 0 001 1 1 2 2 2 255 255 255", 10**6),
+    ),
+)
+def test_plain_data_with_comment(tmp_path, header, data, comment_count):
+    path1 = str(tmp_path / "temp1.ppm")
+    path2 = str(tmp_path / "temp2.ppm")
+    comment = b"# comment" * comment_count
+    with open(path1, "wb") as f1, open(path2, "wb") as f2:
+        f1.write(header + b"\n\n" + data)
+        f2.write(header + b"\n" + comment + b"\n" + data + comment)
+
+    with Image.open(path1) as im:
+        assert_image_equal_tofile(im, path2)
+
+
+@pytest.mark.parametrize("data", (b"P1\n128 128\n", b"P3\n128 128\n255\n"))
+def test_plain_truncated_data(tmp_path, data):
    path = str(tmp_path / "temp.ppm")
    with open(path, "wb") as f:
-        f.write(b"PyInvalid")
+        f.write(data)

-    with pytest.raises(UnidentifiedImageError):
-        with Image.open(path):
-            pass
+    with Image.open(path) as im:
+        with pytest.raises(ValueError):
+            im.load()
+
+
+@pytest.mark.parametrize("data", (b"P1\n128 128\n1009", b"P3\n128 128\n255\n100A"))
+def test_plain_invalid_data(tmp_path, data):
+    path = str(tmp_path / "temp.ppm")
+    with open(path, "wb") as f:
+        f.write(data)
+
+    with Image.open(path) as im:
+        with pytest.raises(ValueError):
+            im.load()
+
+
+@pytest.mark.parametrize(
+    "data",
+    (
+        b"P3\n128 128\n255\n012345678910",  # half token too long
+        b"P3\n128 128\n255\n012345678910 0",  # token too long
+    ),
+)
+def test_plain_ppm_token_too_long(tmp_path, data):
+    path = str(tmp_path / "temp.ppm")
+    with open(path, "wb") as f:
+        f.write(data)
+
+    with Image.open(path) as im:
+        with pytest.raises(ValueError):
+            im.load()
+
+
+def test_plain_ppm_value_too_large(tmp_path):
+    path = str(tmp_path / "temp.ppm")
+    with open(path, "wb") as f:
+        f.write(b"P3\n128 128\n255\n256")
+
+    with Image.open(path) as im:
+        with pytest.raises(ValueError):
+            im.load()
+
+
+def test_magic():
+    with pytest.raises(SyntaxError):
+        PpmImagePlugin.PpmImageFile(fp=BytesIO(b"PyInvalid"))


 def test_header_with_comments(tmp_path):
@ -114,7 +225,7 @@ def test_non_integer_token(tmp_path):
            pass


-def test_token_too_long(tmp_path):
+def test_header_token_too_long(tmp_path):
    path = str(tmp_path / "temp.ppm")
    with open(path, "wb") as f:
        f.write(b"P6\n 01234567890")
--- a/src/PIL/PpmImagePlugin.py
+++ b/src/PIL/PpmImagePlugin.py
@ -27,6 +27,9 @@ b_whitespace = b"\x20\x09\x0a\x0b\x0c\x0d"

 MODES = {
    # standard
+    b"P1": "1",
+    b"P2": "L",
+    b"P3": "RGB",
    b"P4": "1",
    b"P5": "L",
    b"P6": "RGB",
@ -40,7 +43,7 @@ MODES = {


 def _accept(prefix):
-    return prefix[0:1] == b"P" and prefix[1] in b"0456y"
+    return prefix[0:1] == b"P" and prefix[1] in b"0123456y"


 ##
@ -93,19 +96,17 @@ class PpmImageFile(ImageFile.ImageFile):
        except KeyError:
            raise SyntaxError("not a PPM file")

-        self.custom_mimetype = {
-            b"P4": "image/x-portable-bitmap",
-            b"P5": "image/x-portable-graymap",
-            b"P6": "image/x-portable-pixmap",
-        }.get(magic_number)
-
-        if mode == "1":
-            self.mode = "1"
-            rawmode = "1;I"
-        else:
-            self.mode = rawmode = mode
+        if magic_number in (b"P1", b"P4"):
+            self.custom_mimetype = "image/x-portable-bitmap"
+        elif magic_number in (b"P2", b"P5"):
+            self.custom_mimetype = "image/x-portable-graymap"
+        elif magic_number in (b"P3", b"P6"):
+            self.custom_mimetype = "image/x-portable-pixmap"

+        maxval = None
        decoder_name = "raw"
+        if magic_number in (b"P1", b"P2", b"P3"):
+            decoder_name = "ppm_plain"
        for ix in range(3):
            token = int(self._read_token())
            if ix == 0:  # token is the x size
@ -113,7 +114,11 @@ class PpmImageFile(ImageFile.ImageFile):
            elif ix == 1:  # token is the y size
                ysize = token
                if mode == "1":
+                    self.mode = "1"
+                    rawmode = "1;I"
                    break
+                else:
+                    self.mode = rawmode = mode
            elif ix == 2:  # token is maxval
                maxval = token
                if not 0 < maxval < 65536:
@ -123,23 +128,156 @@ class PpmImageFile(ImageFile.ImageFile):
                if maxval > 255 and mode == "L":
                    self.mode = "I"

-                # If maxval matches a bit depth, use the raw decoder directly
-                if maxval == 65535 and mode == "L":
-                    rawmode = "I;16B"
-                elif maxval != 255:
-                    decoder_name = "ppm"
-        args = (rawmode, 0, 1) if decoder_name == "raw" else (rawmode, maxval)
+                if decoder_name != "ppm_plain":
+                    # If maxval matches a bit depth, use the raw decoder directly
+                    if maxval == 65535 and mode == "L":
+                        rawmode = "I;16B"
+                    elif maxval != 255:
+                        decoder_name = "ppm"

+        args = (rawmode, 0, 1) if decoder_name == "raw" else (rawmode, maxval)
        self._size = xsize, ysize
        self.tile = [(decoder_name, (0, 0, xsize, ysize), self.fp.tell(), args)]


+#
+# --------------------------------------------------------------------
+
+
+class PpmPlainDecoder(ImageFile.PyDecoder):
+    _pulls_fd = True
+
+    def _read_block(self):
+        return self.fd.read(ImageFile.SAFEBLOCK)
+
+    def _find_comment_end(self, block, start=0):
+        a = block.find(b"\n", start)
+        b = block.find(b"\r", start)
+        return min(a, b) if a * b > 0 else max(a, b)  # lowest nonnegative index (or -1)
+
+    def _ignore_comments(self, block):
+        if self._comment_spans:
+            # Finish current comment
+            while block:
+                comment_end = self._find_comment_end(block)
+                if comment_end != -1:
+                    # Comment ends in this block
+                    # Delete tail of comment
+                    block = block[comment_end + 1 :]
+                    break
+                else:
+                    # Comment spans whole block
+                    # So read the next block, looking for the end
+                    block = self._read_block()
+
+        # Search for any further comments
+        self._comment_spans = False
+        while True:
+            comment_start = block.find(b"#")
+            if comment_start == -1:
+                # No comment found
+                break
+            comment_end = self._find_comment_end(block, comment_start)
+            if comment_end != -1:
+                # Comment ends in this block
+                # Delete comment
+                block = block[:comment_start] + block[comment_end + 1 :]
+            else:
+                # Comment continues to next block(s)
+                block = block[:comment_start]
+                self._comment_spans = True
+                break
+        return block
+
+    def _decode_bitonal(self):
+        """
+        This is a separate method because in the plain PBM format, all data tokens are
+        exactly one byte, so the inter-token whitespace is optional.
+        """
+        data = bytearray()
+        total_bytes = self.state.xsize * self.state.ysize
+
+        while len(data) != total_bytes:
+            block = self._read_block()  # read next block
+            if not block:
+                # eof
+                break
+
+            block = self._ignore_comments(block)
+
+            tokens = b"".join(block.split())
+            for token in tokens:
+                if token not in (48, 49):
+                    raise ValueError(f"Invalid token for this mode: {bytes([token])}")
+            data = (data + tokens)[:total_bytes]
+        invert = bytes.maketrans(b"01", b"\xFF\x00")
+        return data.translate(invert)
+
+    def _decode_blocks(self, maxval):
+        data = bytearray()
+        max_len = 10
+        out_byte_count = 4 if self.mode == "I" else 1
+        out_max = 65535 if self.mode == "I" else 255
+        bands = Image.getmodebands(self.mode)
+        total_bytes = self.state.xsize * self.state.ysize * bands * out_byte_count
+
+        half_token = False
+        while len(data) != total_bytes:
+            block = self._read_block()  # read next block
+            if not block:
+                if half_token:
+                    block = bytearray(b" ")  # flush half_token
+                else:
+                    # eof
+                    break
+
+            block = self._ignore_comments(block)
+
+            if half_token:
+                block = half_token + block  # stitch half_token to new block
+
+            tokens = block.split()
+
+            if block and not block[-1:].isspace():  # block might split token
+                half_token = tokens.pop()  # save half token for later
+                if len(half_token) > max_len:  # prevent buildup of half_token
+                    raise ValueError(
+                        f"Token too long found in data: {half_token[:max_len + 1]}"
+                    )
+
+            for token in tokens:
+                if len(token) > max_len:
+                    raise ValueError(
+                        f"Token too long found in data: {token[:max_len + 1]}"
+                    )
+                value = int(token)
+                if value > maxval:
+                    raise ValueError(f"Channel value too large for this mode: {value}")
+                value = round(value / maxval * out_max)
+                data += o32(value) if self.mode == "I" else o8(value)
+                if len(data) == total_bytes:  # finished!
+                    break
+        return data
+
+    def decode(self, buffer):
+        self._comment_spans = False
+        if self.mode == "1":
+            data = self._decode_bitonal()
+            rawmode = "1;8"
+        else:
+            maxval = self.args[-1]
+            data = self._decode_blocks(maxval)
+            rawmode = "I;32" if self.mode == "I" else self.mode
+        self.set_as_raw(bytes(data), rawmode)
+        return -1, 0
+
+
 class PpmDecoder(ImageFile.PyDecoder):
    _pulls_fd = True

    def decode(self, buffer):
        data = bytearray()
-        maxval = min(self.args[-1], 65535)
+        maxval = self.args[-1]
        in_byte_count = 1 if maxval < 256 else 2
        out_byte_count = 4 if self.mode == "I" else 1
        out_max = 65535 if self.mode == "I" else 255
@ -156,7 +294,7 @@ class PpmDecoder(ImageFile.PyDecoder):
                value = min(out_max, round(value / maxval * out_max))
                data += o32(value) if self.mode == "I" else o8(value)
        rawmode = "I;32" if self.mode == "I" else self.mode
-        self.set_as_raw(bytes(data), (rawmode, 0, 1))
+        self.set_as_raw(bytes(data), rawmode)
        return -1, 0


@ -197,6 +335,7 @@ Image.register_open(PpmImageFile.format, PpmImageFile, _accept)
 Image.register_save(PpmImageFile.format, _save)

 Image.register_decoder("ppm", PpmDecoder)
+Image.register_decoder("ppm_plain", PpmPlainDecoder)

 Image.register_extensions(PpmImageFile.format, [".pbm", ".pgm", ".ppm", ".pnm"])