Merge pull request #647 from hugovk/house

Fix for junk JPEG data
2025-12-14 13:44:18 +03:00 · 2014-05-23 00:06:09 +01:00 · 2014-05-23 00:06:09 +01:00 · cd17b66541
commit cd17b66541
parent 24cb7bf3df 8a7974c288
3 changed files with 105 additions and 56 deletions
--- a/PIL/JpegImagePlugin.py
+++ b/PIL/JpegImagePlugin.py
@ -34,7 +34,8 @@
 __version__ = "0.6"
-import array, struct
+import array
 import struct
 from PIL import Image, ImageFile, _binary
 from PIL.JpegPresets import presets
 from PIL._util import isStringType
@ -44,6 +45,7 @@ o8 = _binary.o8
 i16 = _binary.i16be
 i32 = _binary.i32be
 #
 # Parser
@ -51,6 +53,7 @@ def Skip(self, marker):
    n = i16(self.fp.read(2))-2
    ImageFile._safe_read(self.fp, n)
 def APP(self, marker):
    #
    # Application marker.  Store these in the APP dictionary.
@ -59,14 +62,14 @@ def APP(self, marker):
    n = i16(self.fp.read(2))-2
    s = ImageFile._safe_read(self.fp, n)
-    app = "APP%d" % (marker&15)
+    app = "APP%d" % (marker & 15)
-    self.app[app] = s # compatibility
+    self.app[app] = s  # compatibility
    self.applist.append((app, s))
    if marker == 0xFFE0 and s[:4] == b"JFIF":
        # extract JFIF information
-        self.info["jfif"] = version = i16(s, 5) # version
+        self.info["jfif"] = version = i16(s, 5)  # version
        self.info["jfif_version"] = divmod(version, 256)
        # extract JFIF properties
        try:
@ -81,10 +84,10 @@ def APP(self, marker):
            self.info["jfif_density"] = jfif_density
    elif marker == 0xFFE1 and s[:5] == b"Exif\0":
        # extract Exif information (incomplete)
-        self.info["exif"] = s # FIXME: value will change
+        self.info["exif"] = s  # FIXME: value will change
    elif marker == 0xFFE2 and s[:5] == b"FPXR\0":
        # extract FlashPix information (incomplete)
-        self.info["flashpix"] = s # FIXME: value will change
+        self.info["flashpix"] = s  # FIXME: value will change
    elif marker == 0xFFE2 and s[:12] == b"ICC_PROFILE\0":
        # Since an ICC profile can be larger than the maximum size of
        # a JPEG marker (64K), we need provisions to split it into
@ -108,16 +111,17 @@ def APP(self, marker):
        else:
            self.info["adobe_transform"] = adobe_transform
 def COM(self, marker):
    #
    # Comment marker.  Store these in the APP dictionary.
    n = i16(self.fp.read(2))-2
    s = ImageFile._safe_read(self.fp, n)
-    self.app["COM"] = s # compatibility
+    self.app["COM"] = s  # compatibility
    self.applist.append(("COM", s))
 def SOF(self, marker):
    #
    # Start of frame marker.  Defines the size and mode of the
@ -149,21 +153,22 @@ def SOF(self, marker):
    if self.icclist:
        # fixup icc profile
-        self.icclist.sort() # sort by sequence number
+        self.icclist.sort()  # sort by sequence number
        if i8(self.icclist[0][13]) == len(self.icclist):
            profile = []
            for p in self.icclist:
                profile.append(p[14:])
            icc_profile = b"".join(profile)
        else:
-            icc_profile = None # wrong number of fragments
+            icc_profile = None  # wrong number of fragments
        self.info["icc_profile"] = icc_profile
        self.icclist = None
    for i in range(6, len(s), 3):
        t = s[i:i+3]
        # 4-tuples: id, vsamp, hsamp, qtable
-        self.layer.append((t[0], i8(t[1])//16, i8(t[1])&15, i8(t[2])))
+        self.layer.append((t[0], i8(t[1])//16, i8(t[1]) & 15, i8(t[2])))
 def DQT(self, marker):
    #
@ -181,10 +186,10 @@ def DQT(self, marker):
            raise SyntaxError("bad quantization table marker")
        v = i8(s[0])
        if v//16 == 0:
-            self.quantization[v&15] = array.array("b", s[1:65])
+            self.quantization[v & 15] = array.array("b", s[1:65])
            s = s[65:]
        else:
-            return # FIXME: add code to read 16-bit tables!
+            return  # FIXME: add code to read 16-bit tables!
            # raise SyntaxError, "bad quantization table element size"
@ -261,6 +266,7 @@ MARKER = {
 def _accept(prefix):
    return prefix[0:1] == b"\377"
 ##
 # Image plugin for JPEG and JFIF images.
@ -284,32 +290,37 @@ class JpegImageFile(ImageFile.ImageFile):
        self.huffman_dc = {}
        self.huffman_ac = {}
        self.quantization = {}
-        self.app = {} # compatibility
+        self.app = {}  # compatibility
        self.applist = []
        self.icclist = []
        while True:
-            s = s + self.fp.read(1)
+            i = i8(s)
-
+            if i == 0xFF:
-            i = i16(s)
+                s = s + self.fp.read(1)
                i = i16(s)
            else:
                # Skip non-0xFF junk
                s = b"\xff"
                continue
            if i in MARKER:
                name, description, handler = MARKER[i]
                # print hex(i), name, description
                if handler is not None:
                    handler(self, i)
-                if i == 0xFFDA: # start of scan
+                if i == 0xFFDA:  # start of scan
                    rawmode = self.mode
                    if self.mode == "CMYK":
-                        rawmode = "CMYK;I" # assume adobe conventions
+                        rawmode = "CMYK;I"  # assume adobe conventions
-                    self.tile = [("jpeg", (0,0) + self.size, 0, (rawmode, ""))]
+                    self.tile = [("jpeg", (0, 0) + self.size, 0, (rawmode, ""))]
                    # self.__offset = self.fp.tell()
                    break
                s = self.fp.read(1)
-            elif i == 0 or i == 65535:
+            elif i == 0 or i == 0xFFFF:
                # padded marker or junk; move on
-                s = "\xff"
+                s = b"\xff"
            else:
                raise SyntaxError("no marker found")
@ -343,7 +354,8 @@ class JpegImageFile(ImageFile.ImageFile):
        # ALTERNATIVE: handle JPEGs via the IJG command line utilities
-        import tempfile, os
+        import tempfile
        import os
        f, path = tempfile.mkstemp()
        os.close(f)
        if os.path.exists(self.filename):
@ -354,8 +366,10 @@ class JpegImageFile(ImageFile.ImageFile):
        try:
            self.im = Image.core.open_ppm(path)
        finally:
-            try: os.unlink(path)
+            try:
-            except: pass
+                os.unlink(path)
            except:
                pass
        self.mode = self.im.mode
        self.size = self.im.size
@ -372,6 +386,7 @@ def _getexif(self):
    # version.
    from PIL import TiffImagePlugin
    import io
    def fixup(value):
        if len(value) == 1:
            return value[0]
@ -422,7 +437,7 @@ RAWMODE = {
    "RGB": "RGB",
    "RGBA": "RGB",
    "RGBX": "RGB",
-    "CMYK": "CMYK;I", # assume adobe conventions
+    "CMYK": "CMYK;I",  # assume adobe conventions
    "YCbCr": "YCbCr",
 }
@ -441,16 +456,19 @@ samplings = {
             (2, 2, 1, 1, 1, 1): 2,
            }
 def convert_dict_qtables(qtables):
    qtables = [qtables[key] for key in range(len(qtables)) if key in qtables]
    for idx, table in enumerate(qtables):
        qtables[idx] = [table[i] for i in zigzag_index]
    return qtables
 def get_sampling(im):
    sampling = im.layer[0][1:3] + im.layer[1][1:3] + im.layer[2][1:3]
    return samplings.get(sampling, -1)
 def _save(im, fp, filename):
    try:
@ -563,12 +581,11 @@ def _save(im, fp, filename):
        info.get("exif", b"")
        )
-
+    # if we optimize, libjpeg needs a buffer big enough to hold the whole image
-    # if we optimize, libjpeg needs a buffer big enough to hold the whole image in a shot.
+    # in a shot. Guessing on the size, at im.size bytes. (raw pizel size is
-    # Guessing on the size, at im.size bytes. (raw pizel size is channels*size, this
+    # channels*size, this is a value that's been used in a django patch.
    # is a value that's been used in a django patch.
    # https://github.com/jdriscoll/django-imagekit/issues/50
-    bufsize=0
+    bufsize = 0
    if "optimize" in info or "progressive" in info or "progression" in info:
        if quality >= 95:
            bufsize = 2 * im.size[0] * im.size[1]
@ -577,17 +594,20 @@ def _save(im, fp, filename):
    # The exif info needs to be written as one block, + APP1, + one spare byte.
    # Ensure that our buffer is big enough
-    bufsize = max(ImageFile.MAXBLOCK, bufsize, len(info.get("exif",b"")) + 5 )
+    bufsize = max(ImageFile.MAXBLOCK, bufsize, len(info.get("exif", b"")) + 5)
    ImageFile._save(im, fp, [("jpeg", (0, 0)+im.size, 0, rawmode)], bufsize)
    ImageFile._save(im, fp, [("jpeg", (0,0)+im.size, 0, rawmode)], bufsize)
 def _save_cjpeg(im, fp, filename):
    # ALTERNATIVE: handle JPEGs via the IJG command line utilities.
    import os
    file = im._dump()
    os.system("cjpeg %s >%s" % (file, filename))
-    try: os.unlink(file)
+    try:
-    except: pass
+        os.unlink(file)
    except:
        pass
 # -------------------------------------------------------------------q-
 # Registry stuff
--- a/Tests/images/junk_jpeg_header.jpg
+++ b/Tests/images/junk_jpeg_header.jpg
--- a/Tests/test_file_jpeg.py
+++ b/Tests/test_file_jpeg.py
@ -12,17 +12,19 @@ if "jpeg_encoder" not in codecs or "jpeg_decoder" not in codecs:
 test_file = "Images/lena.jpg"
 def roundtrip(im, **options):
    out = BytesIO()
    im.save(out, "JPEG", **options)
    bytes = out.tell()
    out.seek(0)
    im = Image.open(out)
-    im.bytes = bytes # for testing only
+    im.bytes = bytes  # for testing only
    return im
 # --------------------------------------------------------------------
 def test_sanity():
    # internal version number
@ -34,6 +36,7 @@ def test_sanity():
    assert_equal(im.size, (128, 128))
    assert_equal(im.format, "JPEG")
 # --------------------------------------------------------------------
 def test_app():
@ -44,6 +47,7 @@ def test_app():
    assert_equal(im.applist[1], ("COM", b"Python Imaging Library"))
    assert_equal(len(im.applist), 2)
 def test_cmyk():
    # Test CMYK handling.  Thanks to Tim and Charlie for test data,
    # Michael for getting me to look one more time.
@ -62,6 +66,7 @@ def test_cmyk():
    c, m, y, k = [x / 255.0 for x in im.getpixel((im.size[0]-1, im.size[1]-1))]
    assert_true(k > 0.9)
 def test_dpi():
    def test(xdpi, ydpi=None):
        im = Image.open(test_file)
@ -70,7 +75,8 @@ def test_dpi():
    assert_equal(test(72), (72, 72))
    assert_equal(test(300), (300, 300))
    assert_equal(test(100, 200), (100, 200))
-    assert_equal(test(0), None) # square pixels
+    assert_equal(test(0), None)  # square pixels
 def test_icc():
    # Test ICC support
@ -89,6 +95,7 @@ def test_icc():
    assert_false(im1.info.get("icc_profile"))
    assert_true(im2.info.get("icc_profile"))
 def test_icc_big():
    # Make sure that the "extra" support handles large blocks
    def test(n):
@ -96,16 +103,20 @@ def test_icc_big():
        # using a 4-byte test code should allow us to detect out of
        # order issues.
        icc_profile = (b"Test"*int(n/4+1))[:n]
-        assert len(icc_profile) == n # sanity
+        assert len(icc_profile) == n  # sanity
        im1 = roundtrip(lena(), icc_profile=icc_profile)
        assert_equal(im1.info.get("icc_profile"), icc_profile or None)
-    test(0); test(1)
+    test(0)
-    test(3); test(4); test(5)
+    test(1)
-    test(65533-14) # full JPEG marker block
+    test(3)
-    test(65533-14+1) # full block plus one byte
+    test(4)
-    test(ImageFile.MAXBLOCK) # full buffer block
+    test(5)
-    test(ImageFile.MAXBLOCK+1) # full buffer block plus one byte
+    test(65533-14)  # full JPEG marker block
-    test(ImageFile.MAXBLOCK*4+3) # large block
+    test(65533-14+1)  # full block plus one byte
    test(ImageFile.MAXBLOCK)  # full buffer block
    test(ImageFile.MAXBLOCK+1)  # full buffer block plus one byte
    test(ImageFile.MAXBLOCK*4+3)  # large block
 def test_optimize():
    im1 = roundtrip(lena())
@ -113,25 +124,29 @@ def test_optimize():
    assert_image_equal(im1, im2)
    assert_true(im1.bytes >= im2.bytes)
 def test_optimize_large_buffer():
-    #https://github.com/python-imaging/Pillow/issues/148
+    # https://github.com/python-imaging/Pillow/issues/148
    f = tempfile('temp.jpg')
    # this requires ~ 1.5x Image.MAXBLOCK
-    im = Image.new("RGB", (4096,4096), 0xff3333)
+    im = Image.new("RGB", (4096, 4096), 0xff3333)
    im.save(f, format="JPEG", optimize=True)
 def test_progressive():
    im1 = roundtrip(lena())
    im2 = roundtrip(lena(), progressive=True)
    assert_image_equal(im1, im2)
    assert_true(im1.bytes >= im2.bytes)
 def test_progressive_large_buffer():
    f = tempfile('temp.jpg')
    # this requires ~ 1.5x Image.MAXBLOCK
-    im = Image.new("RGB", (4096,4096), 0xff3333)
+    im = Image.new("RGB", (4096, 4096), 0xff3333)
    im.save(f, format="JPEG", progressive=True)
 def test_progressive_large_buffer_highest_quality():
    f = tempfile('temp.jpg')
    if py3:
@ -142,16 +157,18 @@ def test_progressive_large_buffer_highest_quality():
    # this requires more bytes than pixels in the image
    im.save(f, format="JPEG", progressive=True, quality=100)
 def test_large_exif():
-    #https://github.com/python-imaging/Pillow/issues/148
+    # https://github.com/python-imaging/Pillow/issues/148
    f = tempfile('temp.jpg')
    im = lena()
-    im.save(f,'JPEG', quality=90, exif=b"1"*65532)
+    im.save(f, 'JPEG', quality=90, exif=b"1"*65532)
 def test_progressive_compat():
    im1 = roundtrip(lena())
    im2 = roundtrip(lena(), progressive=1)
-    im3 = roundtrip(lena(), progression=1) # compatibility
+    im3 = roundtrip(lena(), progression=1)  # compatibility
    assert_image_equal(im1, im2)
    assert_image_equal(im1, im3)
    assert_false(im1.info.get("progressive"))
@ -161,31 +178,34 @@ def test_progressive_compat():
    assert_true(im3.info.get("progressive"))
    assert_true(im3.info.get("progression"))
 def test_quality():
    im1 = roundtrip(lena())
    im2 = roundtrip(lena(), quality=50)
    assert_image(im1, im2.mode, im2.size)
    assert_true(im1.bytes >= im2.bytes)
 def test_smooth():
    im1 = roundtrip(lena())
    im2 = roundtrip(lena(), smooth=100)
    assert_image(im1, im2.mode, im2.size)
 def test_subsampling():
    def getsampling(im):
        layer = im.layer
        return layer[0][1:3] + layer[1][1:3] + layer[2][1:3]
    # experimental API
-    im = roundtrip(lena(), subsampling=-1) # default
+    im = roundtrip(lena(), subsampling=-1)  # default
    assert_equal(getsampling(im), (2, 2, 1, 1, 1, 1))
-    im = roundtrip(lena(), subsampling=0) # 4:4:4
+    im = roundtrip(lena(), subsampling=0)  # 4:4:4
    assert_equal(getsampling(im), (1, 1, 1, 1, 1, 1))
-    im = roundtrip(lena(), subsampling=1) # 4:2:2
+    im = roundtrip(lena(), subsampling=1)  # 4:2:2
    assert_equal(getsampling(im), (2, 1, 1, 1, 1, 1))
-    im = roundtrip(lena(), subsampling=2) # 4:1:1
+    im = roundtrip(lena(), subsampling=2)  # 4:1:1
    assert_equal(getsampling(im), (2, 2, 1, 1, 1, 1))
-    im = roundtrip(lena(), subsampling=3) # default (undefined)
+    im = roundtrip(lena(), subsampling=3)  # default (undefined)
    assert_equal(getsampling(im), (2, 2, 1, 1, 1, 1))
    im = roundtrip(lena(), subsampling="4:4:4")
@ -197,6 +217,7 @@ def test_subsampling():
    assert_exception(TypeError, lambda: roundtrip(lena(), subsampling="1:1:1"))
 def test_exif():
    im = Image.open("Tests/images/pil_sample_rgb.jpg")
    info = im._getexif()
@ -207,3 +228,11 @@ def test_quality_keep():
    im = Image.open("Images/lena.jpg")
    f = tempfile('temp.jpg')
    assert_no_exception(lambda: im.save(f, quality='keep'))
 def test_junk_jpeg_header():
    # https://github.com/python-imaging/Pillow/issues/630
    filename = "Tests/images/junk_jpeg_header.jpg"
    assert_no_exception(lambda: Image.open(filename))
 # End of file