Speed up Gif save optimization step: don't do it as often and do it in C. Fixes #2093

2026-01-10 02:31:20 +03:00 · 2016-09-26 14:44:40 -07:00 · 2016-09-26 14:44:40 -07:00 · a51d3bcdba
commit a51d3bcdba
parent 2178da8ec2
2 changed files with 101 additions and 15 deletions
--- a/PIL/GifImagePlugin.py
+++ b/PIL/GifImagePlugin.py
@ -586,6 +586,10 @@ def _get_header_palette(palette_bytes):
        palette_bytes += o8(0) * 3 * actual_target_size_diff
    return palette_bytes

+# Force optimization so that we can test performance against
+# cases where it took lots of memory and time previously. 
+_FORCE_OPTIMIZE = False
+
 def _get_palette_bytes(im, palette, info):
    if im.mode == "P":
        if palette and isinstance(palette, bytes):
@ -603,32 +607,78 @@ def _get_palette_bytes(im, palette, info):
    if _get_optimize(im, info):
        used_palette_colors = _get_used_palette_colors(im)

-        # create the new palette if not every color is used
-        if len(used_palette_colors) < 256:
-            palette_bytes = b""
-            new_positions = {}
+        # Potentially expensive operation.
+
+        # The palette saves 3 bytes per color not used, but palette
+        # lengths are restricted to 3*(2**N) bytes. Max saving would
+        # be 768 -> 6 bytes if we went all the way down to 2 colors.
+        # * If we're over 128 colors, we can't save any space.
+        # * If there aren't any holes, it's not worth collapsing. 
+        # * If we have a 'large' image, the palette is in the noise.
+
+        # create the new palette if not every color is used
+        if _FORCE_OPTIMIZE or im.mode == 'L' or \
+               (len(used_palette_colors) <= 128 and  
+                max(used_palette_colors) > len(used_palette_colors) and
+                im.width * im.height < 512 * 512):
+            palette_bytes = b""
+            new_positions = [0]*256

-            i = 0
            # pick only the used colors from the palette
-            for oldPosition in used_palette_colors:
+            for i, oldPosition in enumerate(used_palette_colors):
                palette_bytes += source_palette[oldPosition*3:oldPosition*3+3]
                new_positions[oldPosition] = i
-                i += 1

            # replace the palette color id of all pixel with the new id
-            image_bytes = bytearray(im.tobytes())
-            for i in range(len(image_bytes)):
-                image_bytes[i] = new_positions[image_bytes[i]]
-            im.frombytes(bytes(image_bytes))
+
+            # Palette images are [0..255], mapped through a 1 or 3
+            # byte/color map.  We need to remap the whole image
+            # from palette 1 to palette 2. New_positions is
+            # an array of indexes into palette 1.  Palette 2 is
+            # palette 1 with any holes removed.
+
+            # We're going to leverage the convert mechanism to use the
+            # C code to remap the image from palette 1 to palette 2,
+            # by forcing the source image into 'L' mode and adding a
+            # mapping 'L' mode palette, then converting back to 'L'
+            # sans palette thus converting the image bytes, then
+            # assigning the optimized RGB palette.
+
+            # perf reference, 9500x4000 gif, w/~135 colors
+            # 14 sec prepatch, 1 sec postpatch with optimization forced.
+
+            mapping_palette = bytearray(new_positions)
+
+            m_im = im.copy()
+            m_im.mode = 'P'
+
+            m_im.palette = ImagePalette.ImagePalette("RGB",
+                                                   palette=mapping_palette*3,
+                                                   size=768)
+            #possibly set palette dirty, then 
+            #m_im.putpalette(mapping_palette, 'L')  # converts to 'P'
+            # or just force it.
+            # UNDONE -- this is part of the general issue with palettes
+            m_im.im.putpalette(*m_im.palette.getdata())
+            
+            m_im = m_im.convert('L')
+         
+            # Internally, we require 768 bytes for a palette. 
            new_palette_bytes = (palette_bytes +
                                 (768 - len(palette_bytes)) * b'\x00')
-            im.putpalette(new_palette_bytes)
-            im.palette = ImagePalette.ImagePalette("RGB",
+            m_im.putpalette(new_palette_bytes)
+            m_im.palette = ImagePalette.ImagePalette("RGB",
                                                   palette=palette_bytes,
                                                   size=len(palette_bytes))

+            # oh gawd, this is modifying the image in place so I can pass by ref.
+            # REFACTOR SOONEST 
+            im.frombytes(m_im.tobytes())
+            
    if not palette_bytes:
        palette_bytes = source_palette
+
+    # returning palette, _not_ padded to 768 bytes like our internal ones.
    return palette_bytes, used_palette_colors

 def getheader(im, palette=None, info=None):
--- a/Tests/test_file_gif.py
+++ b/Tests/test_file_gif.py
@ -3,6 +3,8 @@ from helper import unittest, PillowTestCase, hopper, netpbm_available
 from PIL import Image
 from PIL import GifImagePlugin

+from io import BytesIO
+
 codecs = dir(Image.core)

 # sample gif stream
@ -33,8 +35,6 @@ class TestFileGif(PillowTestCase):
                          lambda: GifImagePlugin.GifImageFile(invalid_file))

    def test_optimize(self):
-        from io import BytesIO
-
        def test_grayscale(optimize):
            im = Image.new("L", (1, 1), 0)
            filename = BytesIO()
@ -52,6 +52,42 @@ class TestFileGif(PillowTestCase):
        self.assertEqual(test_bilevel(0), 800)
        self.assertEqual(test_bilevel(1), 800)

+    def test_optimize_correctness(self):
+        # 256 color Palette image, posterize to > 128 and < 128 levels
+        # Size bigger and smaller than 512x512
+        # Check the palette for number of colors allocated.
+        # Check for correctness after conversion back to RGB        
+        def check(colors, size, expected_palette_length):
+            # make an image with empty colors in the start of the palette range
+            im = Image.frombytes('P', (colors,colors), bytes(bytearray(range(256-colors,256)*colors)))
+            im = im.resize((size,size))
+            outfile = BytesIO()
+            im.save(outfile, 'GIF')
+            outfile.seek(0)
+            reloaded = Image.open(outfile)
+
+            # check palette length
+            palette_length = max(i+1 for i,v in enumerate(reloaded.histogram()) if v)
+            self.assertEqual(expected_palette_length, palette_length)
+            
+            self.assert_image_equal(im.convert('RGB'), reloaded.convert('RGB'))
+
+
+        # These do optimize the palette
+        check(128, 511, 128)
+        check(64, 511, 64)
+        check(4, 511, 4)
+
+        # These don't optimize the palette
+        check(128, 513, 256)
+        check(64, 513, 256)
+        check(4, 513, 256)
+
+        # other limits that don't optimize the palette
+        check(129, 511, 256)
+        check(255, 511, 256)
+        check(256, 511, 256)
+
    def test_optimize_full_l(self):
        from io import BytesIO