From c469dd9ae56961c3f7ee7289a380c43d90ad57d5 Mon Sep 17 00:00:00 2001 From: Fredrik Tolf Date: Wed, 23 Jul 2014 00:24:18 -0700 Subject: [PATCH 1/5] Added support for encoding and decoding iTXt chunks. --- PIL/PngImagePlugin.py | 67 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 67 insertions(+) diff --git a/PIL/PngImagePlugin.py b/PIL/PngImagePlugin.py index e794ef702..34fff1a31 100644 --- a/PIL/PngImagePlugin.py +++ b/PIL/PngImagePlugin.py @@ -147,6 +147,17 @@ class ChunkStream: return cids +# -------------------------------------------------------------------- +# Subclass of string to allow iTXt chunks to look like strings while +# keeping their extra information + +class iTXt(str): + @staticmethod + def __new__(cls, text, lang, tkey): + self = str.__new__(cls, text) + self.lang = lang + self.tkey = tkey + return self # -------------------------------------------------------------------- # PNG chunk container (for use with save(pnginfo=)) @@ -159,7 +170,26 @@ class PngInfo: def add(self, cid, data): self.chunks.append((cid, data)) + def add_itxt(self, key, value, lang="", tkey="", zip=False): + if not isinstance(key, bytes): + key = key.encode("latin-1", "strict") + if not isinstance(value, bytes): + value = value.encode("utf-8", "strict") + if not isinstance(lang, bytes): + lang = lang.encode("utf-8", "strict") + if not isinstance(tkey, bytes): + tkey = tkey.encode("utf-8", "strict") + + if zip: + import zlib + self.add(b"iTXt", key + b"\0\x01\0" + lang + b"\0" + tkey + b"\0" + zlib.compress(value)) + else: + self.add(b"iTXt", key + b"\0\0\0" + lang + b"\0" + tkey + b"\0" + value) + def add_text(self, key, value, zip=0): + if isinstance(value, iTXt): + return self.add_itxt(key, value, value.lang, value.tkey, bool(zip)) + # The tEXt chunk stores latin-1 text if not isinstance(key, bytes): key = key.encode('latin-1', 'strict') @@ -329,6 +359,43 @@ class PngStream(ChunkStream): self.im_info[k] = self.im_text[k] = v return s + def chunk_iTXt(self, pos, length): + + # international text + r = s = ImageFile._safe_read(self.fp, length) + try: + k, r = r.split(b"\0", 1) + except ValueError: + return s + if len(r) < 2: + return s + cf, cm, r = i8(r[0]), i8(r[1]), r[2:] + try: + lang, tk, v = r.split(b"\0", 2) + except ValueError: + return s + if cf != 0: + if cm == 0: + import zlib + try: + v = zlib.decompress(v) + except zlib.error: + return s + else: + return s + if bytes is not str: + try: + k = k.decode("latin-1", "strict") + lang = lang.decode("utf-8", "strict") + tk = tk.decode("utf-8", "strict") + v = v.decode("utf-8", "strict") + except UnicodeError: + return s + + self.im_info[k] = self.im_text[k] = iTXt(v, lang, tk) + + return s + # -------------------------------------------------------------------- # PNG reader From 2b4d91ed531f7b4a4cd9b266d98ad9d32796c333 Mon Sep 17 00:00:00 2001 From: Fredrik Tolf Date: Wed, 23 Jul 2014 01:09:06 -0700 Subject: [PATCH 2/5] Added iTXt tests. --- Tests/test_file_png.py | 33 +++++++++++++++++++++++++++++++++ 1 file changed, 33 insertions(+) diff --git a/Tests/test_file_png.py b/Tests/test_file_png.py index de96fdf3e..9d20ba2ab 100644 --- a/Tests/test_file_png.py +++ b/Tests/test_file_png.py @@ -129,6 +129,39 @@ class TestFilePng(PillowTestCase): HEAD + chunk(b'zTXt', b'spam\0\0' + zlib.compress(b'egg')) + TAIL) self.assertEqual(im.info, {'spam': 'egg'}) + def test_bad_itxt(self): + + im = load(HEAD + chunk(b'iTXt') + TAIL) + self.assertEqual(im.info, {}) + + im = load(HEAD + chunk(b'iTXt', b'spam') + TAIL) + self.assertEqual(im.info, {}) + + im = load(HEAD + chunk(b'iTXt', b'spam\0') + TAIL) + self.assertEqual(im.info, {}) + + im = load(HEAD + chunk(b'iTXt', b'spam\0\x02') + TAIL) + self.assertEqual(im.info, {}) + + im = load(HEAD + chunk(b'iTXt', b'spam\0\0\0foo\0') + TAIL) + self.assertEqual(im.info, {}) + + im = load(HEAD + chunk(b'iTXt', b'spam\0\0\0en\0Spam\0egg') + TAIL) + self.assertEqual(im.info, {"spam": "egg"}) + self.assertEqual(im.info["spam"].lang, "en") + self.assertEqual(im.info["spam"].tkey, "Spam") + + im = load(HEAD + chunk(b'iTXt', b'spam\0\1\0en\0Spam\0' + zlib.compress(b"egg")[:1]) + TAIL) + self.assertEqual(im.info, {}) + + im = load(HEAD + chunk(b'iTXt', b'spam\0\1\1en\0Spam\0' + zlib.compress(b"egg")) + TAIL) + self.assertEqual(im.info, {}) + + im = load(HEAD + chunk(b'iTXt', b'spam\0\1\0en\0Spam\0' + zlib.compress(b"egg")) + TAIL) + self.assertEqual(im.info, {"spam": "egg"}) + self.assertEqual(im.info["spam"].lang, "en") + self.assertEqual(im.info["spam"].tkey, "Spam") + def test_interlace(self): file = "Tests/images/pil123p.png" From 823d377e4751335be16fcf03dc85236360725c7f Mon Sep 17 00:00:00 2001 From: Fredrik Tolf Date: Wed, 23 Jul 2014 07:27:51 -0700 Subject: [PATCH 3/5] Added tests for iTXt saving. --- Tests/test_file_png.py | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/Tests/test_file_png.py b/Tests/test_file_png.py index 9d20ba2ab..fbccfb9f6 100644 --- a/Tests/test_file_png.py +++ b/Tests/test_file_png.py @@ -265,6 +265,22 @@ class TestFilePng(PillowTestCase): self.assertEqual(im.info, {'TXT': 'VALUE', 'ZIP': 'VALUE'}) self.assertEqual(im.text, {'TXT': 'VALUE', 'ZIP': 'VALUE'}) + def test_roundtrip_itxt(self): + # Check iTXt roundtripping + + im = Image.new("RGB", (32, 32)) + info = PngImagePlugin.PngInfo() + info.add_itxt("spam", "Eggs", "en", "Spam") + info.add_text("eggs", PngImagePlugin.iTXt("Spam", "en", "Eggs"), zip=True) + + im = roundtrip(im, pnginfo=info) + self.assertEqual(im.info, {"spam": "Eggs", "eggs": "Spam"}) + self.assertEqual(im.text, {"spam": "Eggs", "eggs": "Spam"}) + self.assertEqual(im.text["spam"].lang, "en") + self.assertEqual(im.text["spam"].tkey, "Spam") + self.assertEqual(im.text["eggs"].lang, "en") + self.assertEqual(im.text["eggs"].tkey, "Eggs") + def test_scary(self): # Check reading of evil PNG file. For information, see: # http://scary.beasts.org/security/CESA-2004-001.txt From a9f4e30641d9c2d0348d0de1721877998bd7cfaa Mon Sep 17 00:00:00 2001 From: Fredrik Tolf Date: Wed, 23 Jul 2014 07:43:52 -0700 Subject: [PATCH 4/5] Save detected non-Latin1 characters as iTXt to preserve them. --- PIL/PngImagePlugin.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/PIL/PngImagePlugin.py b/PIL/PngImagePlugin.py index 34fff1a31..4dbedb783 100644 --- a/PIL/PngImagePlugin.py +++ b/PIL/PngImagePlugin.py @@ -191,12 +191,15 @@ class PngInfo: return self.add_itxt(key, value, value.lang, value.tkey, bool(zip)) # The tEXt chunk stores latin-1 text + if not isinstance(value, bytes): + try: + value = value.encode('latin-1', 'strict') + except UnicodeError: + return self.add_itxt(key, value, zip=bool(zip)) + if not isinstance(key, bytes): key = key.encode('latin-1', 'strict') - if not isinstance(value, bytes): - value = value.encode('latin-1', 'replace') - if zip: import zlib self.add(b"zTXt", key + b"\0\0" + zlib.compress(value)) From 2687b5cb8deea43995aa3dc48579a273e32ea1c3 Mon Sep 17 00:00:00 2001 From: Fredrik Tolf Date: Wed, 23 Jul 2014 08:17:11 -0700 Subject: [PATCH 5/5] Test unicode preservation in text chunks. --- Tests/test_file_png.py | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) diff --git a/Tests/test_file_png.py b/Tests/test_file_png.py index fbccfb9f6..8ef166347 100644 --- a/Tests/test_file_png.py +++ b/Tests/test_file_png.py @@ -281,6 +281,34 @@ class TestFilePng(PillowTestCase): self.assertEqual(im.text["eggs"].lang, "en") self.assertEqual(im.text["eggs"].tkey, "Eggs") + def test_nonunicode_text(self): + # Check so that non-Unicode text is saved as a tEXt rather than iTXt + + im = Image.new("RGB", (32, 32)) + info = PngImagePlugin.PngInfo() + info.add_text("Text", "Ascii") + im = roundtrip(im, pnginfo=info) + self.assertEqual(type(im.info["Text"]), str) + + def test_unicode_text(self): + # Check preservation of non-ASCII characters on Python3 + # This cannot really be meaningfully tested on Python2, + # since it didn't preserve charsets to begin with. + + def rt_text(value): + im = Image.new("RGB", (32, 32)) + info = PngImagePlugin.PngInfo() + info.add_text("Text", value) + im = roundtrip(im, pnginfo=info) + self.assertEqual(im.info, {"Text": value}) + + if str is not bytes: + rt_text(" Aa" + chr(0xa0) + chr(0xc4) + chr(0xff)) # Latin1 + rt_text(chr(0x400) + chr(0x472) + chr(0x4ff)) # Cyrillic + rt_text(chr(0x4e00) + chr(0x66f0) + # CJK + chr(0x9fba) + chr(0x3042) + chr(0xac00)) + rt_text("A" + chr(0xc4) + chr(0x472) + chr(0x3042)) # Combined + def test_scary(self): # Check reading of evil PNG file. For information, see: # http://scary.beasts.org/security/CESA-2004-001.txt