diff --git a/Tests/test_file_pdf.py b/Tests/test_file_pdf.py index ba4523f9c..8edf875cd 100644 --- a/Tests/test_file_pdf.py +++ b/Tests/test_file_pdf.py @@ -7,16 +7,13 @@ import tempfile class TestFilePdf(PillowTestCase): - def helper_save_as_pdf(self, mode, save_all=False): + def helper_save_as_pdf(self, mode, **kwargs): # Arrange im = hopper(mode) outfile = self.tempfile("temp_" + mode + ".pdf") # Act - if save_all: - im.save(outfile, save_all=True) - else: - im.save(outfile) + im.save(outfile, **kwargs) # Assert self.assertTrue(os.path.isfile(outfile)) @@ -134,18 +131,18 @@ class TestFilePdf(PillowTestCase): def test_pdf_append(self): # make a PDF file - pdf_filename = self.helper_save_as_pdf("RGB") + pdf_filename = self.helper_save_as_pdf("RGB", producer="pdfParser") # open it, check pages and info pdf = pdfParser.PdfParser(pdf_filename) self.assertEqual(len(pdf.pages), 1) - self.assertEqual(len(pdf.info), 0) + self.assertEqual(len(pdf.info), 1) + self.assertEqual(pdfParser.decode_text(pdf.info[b"Producer"]), "pdfParser") # append some info - pdf.info[b"Title"] = b"abc" - pdf.info[b"Author"] = b"def" + pdf.info[b"Title"] = pdfParser.encode_text("abc") + pdf.info[b"Author"] = pdfParser.encode_text("def") pdf.info[b"Subject"] = pdfParser.encode_text("ghi") - pdf.info[b"Keywords"] = b"jkl" - pdf.info[b"Creator"] = b"hopper()" - pdf.info[b"Producer"] = b"pdfParser" + pdf.info[b"Keywords"] = pdfParser.encode_text("jkl") + pdf.info[b"Creator"] = pdfParser.encode_text("hopper()") with open(pdf_filename, "r+b") as f: f.seek(0, os.SEEK_END) pdf.write_xref_and_trailer(f) @@ -153,7 +150,7 @@ class TestFilePdf(PillowTestCase): pdf = pdfParser.PdfParser(pdf_filename) self.assertEqual(len(pdf.pages), 1) self.assertEqual(len(pdf.info), 6) - self.assertEqual(pdf.info[b"Title"], b"abc") + self.assertEqual(pdfParser.decode_text(pdf.info[b"Title"]), "abc") # append two images mode_CMYK = hopper("CMYK") mode_P = hopper("P") @@ -162,7 +159,8 @@ class TestFilePdf(PillowTestCase): pdf = pdfParser.PdfParser(pdf_filename) self.assertEqual(len(pdf.pages), 3) self.assertEqual(len(pdf.info), 6) - self.assertEqual(pdf.info[b"Title"], b"abc") + self.assertEqual(pdfParser.decode_text(pdf.info[b"Title"]), "abc") + self.assertEqual(pdfParser.decode_text(pdf.info[b"Producer"]), "pdfParser") def test_pdf_parser(self): pdfParser.selftest() diff --git a/src/PIL/PdfImagePlugin.py b/src/PIL/PdfImagePlugin.py index be467d014..c836175d3 100644 --- a/src/PIL/PdfImagePlugin.py +++ b/src/PIL/PdfImagePlugin.py @@ -48,12 +48,32 @@ def _save_all(im, fp, filename): def _save(im, fp, filename, save_all=False): resolution = im.encoderinfo.get("resolution", 72.0) is_appending = im.encoderinfo.get("append", False) + title = im.encoderinfo.get("title", None) + author = im.encoderinfo.get("author", None) + subject = im.encoderinfo.get("subject", None) + keywords = im.encoderinfo.get("keywords", None) + creator = im.encoderinfo.get("creator", None) + producer = im.encoderinfo.get("producer", None) + if is_appending: existing_pdf = pdfParser.PdfParser(f=fp, filename=filename) fp.seek(0, io.SEEK_END) else: existing_pdf = pdfParser.PdfParser() + if title: + existing_pdf.info[b"Title"] = pdfParser.encode_text(title) + if author: + existing_pdf.info[b"Author"] = pdfParser.encode_text(author) + if subject: + existing_pdf.info[b"Subject"] = pdfParser.encode_text(subject) + if keywords: + existing_pdf.info[b"Keywords"] = pdfParser.encode_text(keywords) + if creator: + existing_pdf.info[b"Creator"] = pdfParser.encode_text(creator) + if producer: + existing_pdf.info[b"Producer"] = pdfParser.encode_text(producer) + # # make sure image data is available im.load() diff --git a/src/PIL/pdfParser.py b/src/PIL/pdfParser.py index 63ae5c4e7..3a386f493 100644 --- a/src/PIL/pdfParser.py +++ b/src/PIL/pdfParser.py @@ -18,10 +18,63 @@ else: # Python 3.x return s.encode("us-ascii") +# see 7.9.2.2 Text String Type on page 86 and D.3 PDFDocEncoding Character Set on page 656 def encode_text(s): return codecs.BOM_UTF16_BE + s.encode("utf_16_be") +PDFDocEncoding = { + 0x16: u"\u0017", + 0x18: u"\u02D8", + 0x19: u"\u02C7", + 0x1A: u"\u02C6", + 0x1B: u"\u02D9", + 0x1C: u"\u02DD", + 0x1D: u"\u02DB", + 0x1E: u"\u02DA", + 0x1F: u"\u02DC", + 0x80: u"\u2022", + 0x81: u"\u2020", + 0x82: u"\u2021", + 0x83: u"\u2026", + 0x84: u"\u2014", + 0x85: u"\u2013", + 0x86: u"\u0192", + 0x87: u"\u2044", + 0x88: u"\u2039", + 0x89: u"\u203A", + 0x8A: u"\u2212", + 0x8B: u"\u2030", + 0x8C: u"\u201E", + 0x8D: u"\u201C", + 0x8E: u"\u201D", + 0x8F: u"\u2018", + 0x90: u"\u2019", + 0x91: u"\u201A", + 0x92: u"\u2122", + 0x93: u"\uFB01", + 0x94: u"\uFB02", + 0x95: u"\u0141", + 0x96: u"\u0152", + 0x97: u"\u0160", + 0x98: u"\u0178", + 0x99: u"\u017D", + 0x9A: u"\u0131", + 0x9B: u"\u0142", + 0x9C: u"\u0153", + 0x9D: u"\u0161", + 0x9E: u"\u017E", + 0xA0: u"\u20AC", + } +def decode_text(b): + if b[:len(codecs.BOM_UTF16_BE)] == codecs.BOM_UTF16_BE: + return b[len(codecs.BOM_UTF16_BE):].decode("utf_16_be") + elif str == bytes: # Python 2.x + return u"".join(PDFDocEncoding.get(ord(byte), byte) for byte in b) + else: + return "".join(PDFDocEncoding.get(byte, chr(byte)) for byte in b) + + class PdfFormatError(RuntimeError): pass @@ -667,6 +720,10 @@ class PdfParser: def selftest(): + assert encode_text("abc") == b"\xFE\xFF\x00a\x00b\x00c" + assert decode_text(b"\xFE\xFF\x00a\x00b\x00c") == "abc" + assert decode_text(b"abc") == "abc" + assert decode_text(b"\x1B a \x1C") == u"\u02D9 a \u02DD" assert PdfParser.interpret_name(b"Name#23Hash") == b"Name#Hash" assert PdfParser.interpret_name(b"Name#23Hash", as_text=True) == "Name#Hash" assert IndirectReference(1,2) == IndirectReference(1,2)