diff --git a/CHANGES.rst b/CHANGES.rst index a0253def0..16f8234c7 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -4,6 +4,9 @@ Changelog (Pillow) 2.6.0 (unreleased) ------------------ +- Added support for encoding and decoding iTXt chunks #818 + [dolda2000] + - HSV Support #816 [wiredfool] diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md new file mode 100644 index 000000000..e6f4c3f9d --- /dev/null +++ b/CONTRIBUTING.md @@ -0,0 +1,7 @@ +When reporting bugs, please include example code that reproduces the issue, and if possible a problem image. + +Let us know: + + * What is the expected output? What do you see instead? + + * What versions of Pillow and Python are you using? diff --git a/PIL/PngImagePlugin.py b/PIL/PngImagePlugin.py index e794ef702..4dbedb783 100644 --- a/PIL/PngImagePlugin.py +++ b/PIL/PngImagePlugin.py @@ -147,6 +147,17 @@ class ChunkStream: return cids +# -------------------------------------------------------------------- +# Subclass of string to allow iTXt chunks to look like strings while +# keeping their extra information + +class iTXt(str): + @staticmethod + def __new__(cls, text, lang, tkey): + self = str.__new__(cls, text) + self.lang = lang + self.tkey = tkey + return self # -------------------------------------------------------------------- # PNG chunk container (for use with save(pnginfo=)) @@ -159,14 +170,36 @@ class PngInfo: def add(self, cid, data): self.chunks.append((cid, data)) + def add_itxt(self, key, value, lang="", tkey="", zip=False): + if not isinstance(key, bytes): + key = key.encode("latin-1", "strict") + if not isinstance(value, bytes): + value = value.encode("utf-8", "strict") + if not isinstance(lang, bytes): + lang = lang.encode("utf-8", "strict") + if not isinstance(tkey, bytes): + tkey = tkey.encode("utf-8", "strict") + + if zip: + import zlib + self.add(b"iTXt", key + b"\0\x01\0" + lang + b"\0" + tkey + b"\0" + zlib.compress(value)) + else: + self.add(b"iTXt", key + b"\0\0\0" + lang + b"\0" + tkey + b"\0" + value) + def add_text(self, key, value, zip=0): + if isinstance(value, iTXt): + return self.add_itxt(key, value, value.lang, value.tkey, bool(zip)) + # The tEXt chunk stores latin-1 text + if not isinstance(value, bytes): + try: + value = value.encode('latin-1', 'strict') + except UnicodeError: + return self.add_itxt(key, value, zip=bool(zip)) + if not isinstance(key, bytes): key = key.encode('latin-1', 'strict') - if not isinstance(value, bytes): - value = value.encode('latin-1', 'replace') - if zip: import zlib self.add(b"zTXt", key + b"\0\0" + zlib.compress(value)) @@ -329,6 +362,43 @@ class PngStream(ChunkStream): self.im_info[k] = self.im_text[k] = v return s + def chunk_iTXt(self, pos, length): + + # international text + r = s = ImageFile._safe_read(self.fp, length) + try: + k, r = r.split(b"\0", 1) + except ValueError: + return s + if len(r) < 2: + return s + cf, cm, r = i8(r[0]), i8(r[1]), r[2:] + try: + lang, tk, v = r.split(b"\0", 2) + except ValueError: + return s + if cf != 0: + if cm == 0: + import zlib + try: + v = zlib.decompress(v) + except zlib.error: + return s + else: + return s + if bytes is not str: + try: + k = k.decode("latin-1", "strict") + lang = lang.decode("utf-8", "strict") + tk = tk.decode("utf-8", "strict") + v = v.decode("utf-8", "strict") + except UnicodeError: + return s + + self.im_info[k] = self.im_text[k] = iTXt(v, lang, tk) + + return s + # -------------------------------------------------------------------- # PNG reader diff --git a/Tests/test_file_png.py b/Tests/test_file_png.py index de96fdf3e..8ef166347 100644 --- a/Tests/test_file_png.py +++ b/Tests/test_file_png.py @@ -129,6 +129,39 @@ class TestFilePng(PillowTestCase): HEAD + chunk(b'zTXt', b'spam\0\0' + zlib.compress(b'egg')) + TAIL) self.assertEqual(im.info, {'spam': 'egg'}) + def test_bad_itxt(self): + + im = load(HEAD + chunk(b'iTXt') + TAIL) + self.assertEqual(im.info, {}) + + im = load(HEAD + chunk(b'iTXt', b'spam') + TAIL) + self.assertEqual(im.info, {}) + + im = load(HEAD + chunk(b'iTXt', b'spam\0') + TAIL) + self.assertEqual(im.info, {}) + + im = load(HEAD + chunk(b'iTXt', b'spam\0\x02') + TAIL) + self.assertEqual(im.info, {}) + + im = load(HEAD + chunk(b'iTXt', b'spam\0\0\0foo\0') + TAIL) + self.assertEqual(im.info, {}) + + im = load(HEAD + chunk(b'iTXt', b'spam\0\0\0en\0Spam\0egg') + TAIL) + self.assertEqual(im.info, {"spam": "egg"}) + self.assertEqual(im.info["spam"].lang, "en") + self.assertEqual(im.info["spam"].tkey, "Spam") + + im = load(HEAD + chunk(b'iTXt', b'spam\0\1\0en\0Spam\0' + zlib.compress(b"egg")[:1]) + TAIL) + self.assertEqual(im.info, {}) + + im = load(HEAD + chunk(b'iTXt', b'spam\0\1\1en\0Spam\0' + zlib.compress(b"egg")) + TAIL) + self.assertEqual(im.info, {}) + + im = load(HEAD + chunk(b'iTXt', b'spam\0\1\0en\0Spam\0' + zlib.compress(b"egg")) + TAIL) + self.assertEqual(im.info, {"spam": "egg"}) + self.assertEqual(im.info["spam"].lang, "en") + self.assertEqual(im.info["spam"].tkey, "Spam") + def test_interlace(self): file = "Tests/images/pil123p.png" @@ -232,6 +265,50 @@ class TestFilePng(PillowTestCase): self.assertEqual(im.info, {'TXT': 'VALUE', 'ZIP': 'VALUE'}) self.assertEqual(im.text, {'TXT': 'VALUE', 'ZIP': 'VALUE'}) + def test_roundtrip_itxt(self): + # Check iTXt roundtripping + + im = Image.new("RGB", (32, 32)) + info = PngImagePlugin.PngInfo() + info.add_itxt("spam", "Eggs", "en", "Spam") + info.add_text("eggs", PngImagePlugin.iTXt("Spam", "en", "Eggs"), zip=True) + + im = roundtrip(im, pnginfo=info) + self.assertEqual(im.info, {"spam": "Eggs", "eggs": "Spam"}) + self.assertEqual(im.text, {"spam": "Eggs", "eggs": "Spam"}) + self.assertEqual(im.text["spam"].lang, "en") + self.assertEqual(im.text["spam"].tkey, "Spam") + self.assertEqual(im.text["eggs"].lang, "en") + self.assertEqual(im.text["eggs"].tkey, "Eggs") + + def test_nonunicode_text(self): + # Check so that non-Unicode text is saved as a tEXt rather than iTXt + + im = Image.new("RGB", (32, 32)) + info = PngImagePlugin.PngInfo() + info.add_text("Text", "Ascii") + im = roundtrip(im, pnginfo=info) + self.assertEqual(type(im.info["Text"]), str) + + def test_unicode_text(self): + # Check preservation of non-ASCII characters on Python3 + # This cannot really be meaningfully tested on Python2, + # since it didn't preserve charsets to begin with. + + def rt_text(value): + im = Image.new("RGB", (32, 32)) + info = PngImagePlugin.PngInfo() + info.add_text("Text", value) + im = roundtrip(im, pnginfo=info) + self.assertEqual(im.info, {"Text": value}) + + if str is not bytes: + rt_text(" Aa" + chr(0xa0) + chr(0xc4) + chr(0xff)) # Latin1 + rt_text(chr(0x400) + chr(0x472) + chr(0x4ff)) # Cyrillic + rt_text(chr(0x4e00) + chr(0x66f0) + # CJK + chr(0x9fba) + chr(0x3042) + chr(0xac00)) + rt_text("A" + chr(0xc4) + chr(0x472) + chr(0x3042)) # Combined + def test_scary(self): # Check reading of evil PNG file. For information, see: # http://scary.beasts.org/security/CESA-2004-001.txt diff --git a/docs/handbook/concepts.rst b/docs/handbook/concepts.rst index 93f964e41..b5e5e44c1 100644 --- a/docs/handbook/concepts.rst +++ b/docs/handbook/concepts.rst @@ -27,6 +27,8 @@ image. The current release supports the following standard modes: * ``RGBA`` (4x8-bit pixels, true color with transparency mask) * ``CMYK`` (4x8-bit pixels, color separation) * ``YCbCr`` (3x8-bit pixels, color video format) + * ``LAB`` (3x8-bit pixels, the L*a*b color space) + * ``HSV`` (3x8-bit pixels, Hue, Saturation, Value color space) * ``I`` (32-bit signed integer pixels) * ``F`` (32-bit floating point pixels) @@ -34,7 +36,7 @@ PIL also provides limited support for a few special modes, including ``LA`` (L with alpha), ``RGBX`` (true color with padding) and ``RGBa`` (true color with premultiplied alpha). However, PIL doesn’t support user-defined modes; if you to handle band combinations that are not listed above, use a sequence of Image -objects. +objects. You can read the mode of an image through the :py:attr:`~PIL.Image.Image.mode` attribute. This is a string containing one of the above values.